]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
2008-10-29 Manuel Lopez-Ibanez <manu@gcc.gnu.org>
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
75a70cf9 1/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
51#include "c-common.h"
52#include "machmode.h"
75a70cf9 53#include "gimple.h"
644459d0 54#include "tm-constrs.h"
55#include "spu-builtins.h"
d52fd16a 56#include "ddg.h"
5a976006 57#include "sbitmap.h"
58#include "timevar.h"
59#include "df.h"
6352eedf 60
61/* Builtin types, data and prototypes. */
62struct spu_builtin_range
63{
64 int low, high;
65};
66
67static struct spu_builtin_range spu_builtin_range[] = {
68 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
69 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
70 {0ll, 0x7fll}, /* SPU_BTI_U7 */
71 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
72 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
73 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
74 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
75 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
76 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
77 {0ll, 0xffffll}, /* SPU_BTI_U16 */
78 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
79 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80};
81
644459d0 82\f
83/* Target specific attribute specifications. */
84char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
85
86/* Prototypes and external defs. */
87static void spu_init_builtins (void);
88static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
89static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
90static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
91static rtx get_pic_reg (void);
92static int need_to_save_reg (int regno, int saving);
93static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
94static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
95static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
96 rtx scratch);
97static void emit_nop_for_insn (rtx insn);
98static bool insn_clobbers_hbr (rtx insn);
99static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 100 int distance, sbitmap blocks);
5474166e 101static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
102 enum machine_mode dmode);
644459d0 103static rtx get_branch_target (rtx branch);
644459d0 104static void spu_machine_dependent_reorg (void);
105static int spu_sched_issue_rate (void);
106static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
107 int can_issue_more);
108static int get_pipe (rtx insn);
644459d0 109static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 110static void spu_sched_init_global (FILE *, int, int);
111static void spu_sched_init (FILE *, int, int);
112static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 113static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
117 int flags,
118 unsigned char *no_add_attrs);
119static int spu_naked_function_p (tree func);
fb80456a 120static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
121 const_tree type, unsigned char named);
644459d0 122static tree spu_build_builtin_va_list (void);
8a58ed0a 123static void spu_va_start (tree, rtx);
75a70cf9 124static tree spu_gimplify_va_arg_expr (tree valist, tree type,
125 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 126static int regno_aligned_for_load (int regno);
127static int store_with_one_insn_p (rtx mem);
644459d0 128static int mem_is_padded_component_ref (rtx x);
129static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
130static void spu_asm_globalize_label (FILE * file, const char *name);
131static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 132 int *total, bool speed);
644459d0 133static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
134static void spu_init_libfuncs (void);
fb80456a 135static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 136static void fix_range (const char *);
69ced2d6 137static void spu_encode_section_info (tree, rtx, int);
e99f512d 138static tree spu_builtin_mul_widen_even (tree);
139static tree spu_builtin_mul_widen_odd (tree);
a76866d3 140static tree spu_builtin_mask_for_load (void);
a28df51d 141static int spu_builtin_vectorization_cost (bool);
a9f1838b 142static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 143static tree spu_builtin_vec_perm (tree, tree *);
d52fd16a 144static int spu_sms_res_mii (struct ddg *g);
5a976006 145static void asm_file_start (void);
644459d0 146
147extern const char *reg_names[];
148rtx spu_compare_op0, spu_compare_op1;
149
5474166e 150/* Which instruction set architecture to use. */
151int spu_arch;
152/* Which cpu are we tuning for. */
153int spu_tune;
154
5a976006 155/* The hardware requires 8 insns between a hint and the branch it
156 effects. This variable describes how many rtl instructions the
157 compiler needs to see before inserting a hint, and then the compiler
158 will insert enough nops to make it at least 8 insns. The default is
159 for the compiler to allow up to 2 nops be emitted. The nops are
160 inserted in pairs, so we round down. */
161int spu_hint_dist = (8*4) - (2*4);
162
163/* Determines whether we run variable tracking in machine dependent
164 reorganization. */
165static int spu_flag_var_tracking;
166
644459d0 167enum spu_immediate {
168 SPU_NONE,
169 SPU_IL,
170 SPU_ILA,
171 SPU_ILH,
172 SPU_ILHU,
173 SPU_ORI,
174 SPU_ORHI,
175 SPU_ORBI,
99369027 176 SPU_IOHL
644459d0 177};
dea01258 178enum immediate_class
179{
180 IC_POOL, /* constant pool */
181 IC_IL1, /* one il* instruction */
182 IC_IL2, /* both ilhu and iohl instructions */
183 IC_IL1s, /* one il* instruction */
184 IC_IL2s, /* both ilhu and iohl instructions */
185 IC_FSMBI, /* the fsmbi instruction */
186 IC_CPAT, /* one of the c*d instructions */
5df189be 187 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 188};
644459d0 189
190static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
191static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 192static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
193static enum immediate_class classify_immediate (rtx op,
194 enum machine_mode mode);
644459d0 195
1bd43494 196static enum machine_mode spu_unwind_word_mode (void);
197
ea32e033 198static enum machine_mode
199spu_libgcc_cmp_return_mode (void);
200
201static enum machine_mode
202spu_libgcc_shift_count_mode (void);
203
644459d0 204/* Built in types. */
205tree spu_builtin_types[SPU_BTI_MAX];
206\f
207/* TARGET overrides. */
208
209#undef TARGET_INIT_BUILTINS
210#define TARGET_INIT_BUILTINS spu_init_builtins
211
644459d0 212#undef TARGET_EXPAND_BUILTIN
213#define TARGET_EXPAND_BUILTIN spu_expand_builtin
214
1bd43494 215#undef TARGET_UNWIND_WORD_MODE
216#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 217
218/* The .8byte directive doesn't seem to work well for a 32 bit
219 architecture. */
220#undef TARGET_ASM_UNALIGNED_DI_OP
221#define TARGET_ASM_UNALIGNED_DI_OP NULL
222
223#undef TARGET_RTX_COSTS
224#define TARGET_RTX_COSTS spu_rtx_costs
225
226#undef TARGET_ADDRESS_COST
f529eb25 227#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 228
229#undef TARGET_SCHED_ISSUE_RATE
230#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
231
5a976006 232#undef TARGET_SCHED_INIT_GLOBAL
233#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
234
235#undef TARGET_SCHED_INIT
236#define TARGET_SCHED_INIT spu_sched_init
237
644459d0 238#undef TARGET_SCHED_VARIABLE_ISSUE
239#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
240
5a976006 241#undef TARGET_SCHED_REORDER
242#define TARGET_SCHED_REORDER spu_sched_reorder
243
244#undef TARGET_SCHED_REORDER2
245#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 246
247#undef TARGET_SCHED_ADJUST_COST
248#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
249
250const struct attribute_spec spu_attribute_table[];
251#undef TARGET_ATTRIBUTE_TABLE
252#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
253
254#undef TARGET_ASM_INTEGER
255#define TARGET_ASM_INTEGER spu_assemble_integer
256
257#undef TARGET_SCALAR_MODE_SUPPORTED_P
258#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
259
260#undef TARGET_VECTOR_MODE_SUPPORTED_P
261#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
262
263#undef TARGET_FUNCTION_OK_FOR_SIBCALL
264#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
265
266#undef TARGET_ASM_GLOBALIZE_LABEL
267#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
268
269#undef TARGET_PASS_BY_REFERENCE
270#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
271
272#undef TARGET_MUST_PASS_IN_STACK
273#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
274
275#undef TARGET_BUILD_BUILTIN_VA_LIST
276#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
277
8a58ed0a 278#undef TARGET_EXPAND_BUILTIN_VA_START
279#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
280
644459d0 281#undef TARGET_SETUP_INCOMING_VARARGS
282#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
283
284#undef TARGET_MACHINE_DEPENDENT_REORG
285#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
286
287#undef TARGET_GIMPLIFY_VA_ARG_EXPR
288#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
289
290#undef TARGET_DEFAULT_TARGET_FLAGS
291#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
292
293#undef TARGET_INIT_LIBFUNCS
294#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
295
296#undef TARGET_RETURN_IN_MEMORY
297#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
298
69ced2d6 299#undef TARGET_ENCODE_SECTION_INFO
300#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
301
e99f512d 302#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
303#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
304
305#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
306#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
307
a76866d3 308#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
309#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
310
a28df51d 311#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
312#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
313
0e87db76 314#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
315#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
316
a0515226 317#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
318#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
319
ea32e033 320#undef TARGET_LIBGCC_CMP_RETURN_MODE
321#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
322
323#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
324#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
325
d52fd16a 326#undef TARGET_SCHED_SMS_RES_MII
327#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
328
5a976006 329#undef TARGET_ASM_FILE_START
330#define TARGET_ASM_FILE_START asm_file_start
331
644459d0 332struct gcc_target targetm = TARGET_INITIALIZER;
333
5df189be 334void
335spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
336{
5df189be 337 /* Override some of the default param values. With so many registers
338 larger values are better for these params. */
339 MAX_PENDING_LIST_LENGTH = 128;
340
341 /* With so many registers this is better on by default. */
342 flag_rename_registers = 1;
343}
344
644459d0 345/* Sometimes certain combinations of command options do not make sense
346 on a particular target machine. You can define a macro
347 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
348 executed once just after all the command options have been parsed. */
349void
350spu_override_options (void)
351{
14d408d9 352 /* Small loops will be unpeeled at -O3. For SPU it is more important
353 to keep code small by default. */
354 if (!flag_unroll_loops && !flag_peel_loops
355 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
356 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
357
644459d0 358 flag_omit_frame_pointer = 1;
359
5a976006 360 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 361 if (align_functions < 8)
362 align_functions = 8;
c7b91b14 363
5a976006 364 spu_hint_dist = 8*4 - spu_max_nops*4;
365 if (spu_hint_dist < 0)
366 spu_hint_dist = 0;
367
c7b91b14 368 if (spu_fixed_range_string)
369 fix_range (spu_fixed_range_string);
5474166e 370
371 /* Determine processor architectural level. */
372 if (spu_arch_string)
373 {
374 if (strcmp (&spu_arch_string[0], "cell") == 0)
375 spu_arch = PROCESSOR_CELL;
376 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
377 spu_arch = PROCESSOR_CELLEDP;
378 else
379 error ("Unknown architecture '%s'", &spu_arch_string[0]);
380 }
381
382 /* Determine processor to tune for. */
383 if (spu_tune_string)
384 {
385 if (strcmp (&spu_tune_string[0], "cell") == 0)
386 spu_tune = PROCESSOR_CELL;
387 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
388 spu_tune = PROCESSOR_CELLEDP;
389 else
390 error ("Unknown architecture '%s'", &spu_tune_string[0]);
391 }
98bbec1e 392
13684256 393 /* Change defaults according to the processor architecture. */
394 if (spu_arch == PROCESSOR_CELLEDP)
395 {
396 /* If no command line option has been otherwise specified, change
397 the default to -mno-safe-hints on celledp -- only the original
398 Cell/B.E. processors require this workaround. */
399 if (!(target_flags_explicit & MASK_SAFE_HINTS))
400 target_flags &= ~MASK_SAFE_HINTS;
401 }
402
98bbec1e 403 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 404}
405\f
406/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
407 struct attribute_spec.handler. */
408
409/* Table of machine attributes. */
410const struct attribute_spec spu_attribute_table[] =
411{
412 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
413 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
414 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
415 { NULL, 0, 0, false, false, false, NULL }
416};
417
418/* True if MODE is valid for the target. By "valid", we mean able to
419 be manipulated in non-trivial ways. In particular, this means all
420 the arithmetic is supported. */
421static bool
422spu_scalar_mode_supported_p (enum machine_mode mode)
423{
424 switch (mode)
425 {
426 case QImode:
427 case HImode:
428 case SImode:
429 case SFmode:
430 case DImode:
431 case TImode:
432 case DFmode:
433 return true;
434
435 default:
436 return false;
437 }
438}
439
440/* Similarly for vector modes. "Supported" here is less strict. At
441 least some operations are supported; need to check optabs or builtins
442 for further details. */
443static bool
444spu_vector_mode_supported_p (enum machine_mode mode)
445{
446 switch (mode)
447 {
448 case V16QImode:
449 case V8HImode:
450 case V4SImode:
451 case V2DImode:
452 case V4SFmode:
453 case V2DFmode:
454 return true;
455
456 default:
457 return false;
458 }
459}
460
461/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
462 least significant bytes of the outer mode. This function returns
463 TRUE for the SUBREG's where this is correct. */
464int
465valid_subreg (rtx op)
466{
467 enum machine_mode om = GET_MODE (op);
468 enum machine_mode im = GET_MODE (SUBREG_REG (op));
469 return om != VOIDmode && im != VOIDmode
470 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 471 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
472 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 473}
474
475/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 476 and adjust the start offset. */
644459d0 477static rtx
478adjust_operand (rtx op, HOST_WIDE_INT * start)
479{
480 enum machine_mode mode;
481 int op_size;
38aca5eb 482 /* Strip any paradoxical SUBREG. */
483 if (GET_CODE (op) == SUBREG
484 && (GET_MODE_BITSIZE (GET_MODE (op))
485 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 486 {
487 if (start)
488 *start -=
489 GET_MODE_BITSIZE (GET_MODE (op)) -
490 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
491 op = SUBREG_REG (op);
492 }
493 /* If it is smaller than SI, assure a SUBREG */
494 op_size = GET_MODE_BITSIZE (GET_MODE (op));
495 if (op_size < 32)
496 {
497 if (start)
498 *start += 32 - op_size;
499 op_size = 32;
500 }
501 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
502 mode = mode_for_size (op_size, MODE_INT, 0);
503 if (mode != GET_MODE (op))
504 op = gen_rtx_SUBREG (mode, op, 0);
505 return op;
506}
507
508void
509spu_expand_extv (rtx ops[], int unsignedp)
510{
511 HOST_WIDE_INT width = INTVAL (ops[2]);
512 HOST_WIDE_INT start = INTVAL (ops[3]);
513 HOST_WIDE_INT src_size, dst_size;
514 enum machine_mode src_mode, dst_mode;
515 rtx dst = ops[0], src = ops[1];
516 rtx s;
517
518 dst = adjust_operand (ops[0], 0);
519 dst_mode = GET_MODE (dst);
520 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
521
644459d0 522 src = adjust_operand (src, &start);
523 src_mode = GET_MODE (src);
524 src_size = GET_MODE_BITSIZE (GET_MODE (src));
525
526 if (start > 0)
527 {
528 s = gen_reg_rtx (src_mode);
529 switch (src_mode)
530 {
531 case SImode:
532 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
533 break;
534 case DImode:
535 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
536 break;
537 case TImode:
538 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
539 break;
540 default:
541 abort ();
542 }
543 src = s;
544 }
545
546 if (width < src_size)
547 {
548 rtx pat;
549 int icode;
550 switch (src_mode)
551 {
552 case SImode:
553 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
554 break;
555 case DImode:
556 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
557 break;
558 case TImode:
559 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
560 break;
561 default:
562 abort ();
563 }
564 s = gen_reg_rtx (src_mode);
565 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
566 emit_insn (pat);
567 src = s;
568 }
569
570 convert_move (dst, src, unsignedp);
571}
572
573void
574spu_expand_insv (rtx ops[])
575{
576 HOST_WIDE_INT width = INTVAL (ops[1]);
577 HOST_WIDE_INT start = INTVAL (ops[2]);
578 HOST_WIDE_INT maskbits;
579 enum machine_mode dst_mode, src_mode;
580 rtx dst = ops[0], src = ops[3];
581 int dst_size, src_size;
582 rtx mask;
583 rtx shift_reg;
584 int shift;
585
586
587 if (GET_CODE (ops[0]) == MEM)
588 dst = gen_reg_rtx (TImode);
589 else
590 dst = adjust_operand (dst, &start);
591 dst_mode = GET_MODE (dst);
592 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
593
594 if (CONSTANT_P (src))
595 {
596 enum machine_mode m =
597 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
598 src = force_reg (m, convert_to_mode (m, src, 0));
599 }
600 src = adjust_operand (src, 0);
601 src_mode = GET_MODE (src);
602 src_size = GET_MODE_BITSIZE (GET_MODE (src));
603
604 mask = gen_reg_rtx (dst_mode);
605 shift_reg = gen_reg_rtx (dst_mode);
606 shift = dst_size - start - width;
607
608 /* It's not safe to use subreg here because the compiler assumes
609 that the SUBREG_REG is right justified in the SUBREG. */
610 convert_move (shift_reg, src, 1);
611
612 if (shift > 0)
613 {
614 switch (dst_mode)
615 {
616 case SImode:
617 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
618 break;
619 case DImode:
620 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
621 break;
622 case TImode:
623 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
624 break;
625 default:
626 abort ();
627 }
628 }
629 else if (shift < 0)
630 abort ();
631
632 switch (dst_size)
633 {
634 case 32:
635 maskbits = (-1ll << (32 - width - start));
636 if (start)
637 maskbits += (1ll << (32 - start));
638 emit_move_insn (mask, GEN_INT (maskbits));
639 break;
640 case 64:
641 maskbits = (-1ll << (64 - width - start));
642 if (start)
643 maskbits += (1ll << (64 - start));
644 emit_move_insn (mask, GEN_INT (maskbits));
645 break;
646 case 128:
647 {
648 unsigned char arr[16];
649 int i = start / 8;
650 memset (arr, 0, sizeof (arr));
651 arr[i] = 0xff >> (start & 7);
652 for (i++; i <= (start + width - 1) / 8; i++)
653 arr[i] = 0xff;
654 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
655 emit_move_insn (mask, array_to_constant (TImode, arr));
656 }
657 break;
658 default:
659 abort ();
660 }
661 if (GET_CODE (ops[0]) == MEM)
662 {
663 rtx aligned = gen_reg_rtx (SImode);
664 rtx low = gen_reg_rtx (SImode);
665 rtx addr = gen_reg_rtx (SImode);
666 rtx rotl = gen_reg_rtx (SImode);
667 rtx mask0 = gen_reg_rtx (TImode);
668 rtx mem;
669
670 emit_move_insn (addr, XEXP (ops[0], 0));
671 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
672 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
673 emit_insn (gen_negsi2 (rotl, low));
674 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
675 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
676 mem = change_address (ops[0], TImode, aligned);
677 set_mem_alias_set (mem, 0);
678 emit_move_insn (dst, mem);
679 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
680 emit_move_insn (mem, dst);
681 if (start + width > MEM_ALIGN (ops[0]))
682 {
683 rtx shl = gen_reg_rtx (SImode);
684 rtx mask1 = gen_reg_rtx (TImode);
685 rtx dst1 = gen_reg_rtx (TImode);
686 rtx mem1;
687 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
688 emit_insn (gen_shlqby_ti (mask1, mask, shl));
689 mem1 = adjust_address (mem, TImode, 16);
690 set_mem_alias_set (mem1, 0);
691 emit_move_insn (dst1, mem1);
692 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
693 emit_move_insn (mem1, dst1);
694 }
695 }
696 else
71cd778d 697 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 698}
699
700
701int
702spu_expand_block_move (rtx ops[])
703{
704 HOST_WIDE_INT bytes, align, offset;
705 rtx src, dst, sreg, dreg, target;
706 int i;
707 if (GET_CODE (ops[2]) != CONST_INT
708 || GET_CODE (ops[3]) != CONST_INT
48eb4342 709 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 710 return 0;
711
712 bytes = INTVAL (ops[2]);
713 align = INTVAL (ops[3]);
714
715 if (bytes <= 0)
716 return 1;
717
718 dst = ops[0];
719 src = ops[1];
720
721 if (align == 16)
722 {
723 for (offset = 0; offset + 16 <= bytes; offset += 16)
724 {
725 dst = adjust_address (ops[0], V16QImode, offset);
726 src = adjust_address (ops[1], V16QImode, offset);
727 emit_move_insn (dst, src);
728 }
729 if (offset < bytes)
730 {
731 rtx mask;
732 unsigned char arr[16] = { 0 };
733 for (i = 0; i < bytes - offset; i++)
734 arr[i] = 0xff;
735 dst = adjust_address (ops[0], V16QImode, offset);
736 src = adjust_address (ops[1], V16QImode, offset);
737 mask = gen_reg_rtx (V16QImode);
738 sreg = gen_reg_rtx (V16QImode);
739 dreg = gen_reg_rtx (V16QImode);
740 target = gen_reg_rtx (V16QImode);
741 emit_move_insn (mask, array_to_constant (V16QImode, arr));
742 emit_move_insn (dreg, dst);
743 emit_move_insn (sreg, src);
744 emit_insn (gen_selb (target, dreg, sreg, mask));
745 emit_move_insn (dst, target);
746 }
747 return 1;
748 }
749 return 0;
750}
751
752enum spu_comp_code
753{ SPU_EQ, SPU_GT, SPU_GTU };
754
5474166e 755int spu_comp_icode[12][3] = {
756 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
757 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
758 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
759 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
760 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
761 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
762 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
763 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
764 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
765 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
766 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
767 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 768};
769
770/* Generate a compare for CODE. Return a brand-new rtx that represents
771 the result of the compare. GCC can figure this out too if we don't
772 provide all variations of compares, but GCC always wants to use
773 WORD_MODE, we can generate better code in most cases if we do it
774 ourselves. */
775void
776spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
777{
778 int reverse_compare = 0;
779 int reverse_test = 0;
5d70b918 780 rtx compare_result, eq_result;
781 rtx comp_rtx, eq_rtx;
644459d0 782 rtx target = operands[0];
783 enum machine_mode comp_mode;
784 enum machine_mode op_mode;
5d70b918 785 enum spu_comp_code scode, eq_code, ior_code;
644459d0 786 int index;
5d70b918 787 int eq_test = 0;
644459d0 788
789 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
790 and so on, to keep the constant in operand 1. */
791 if (GET_CODE (spu_compare_op1) == CONST_INT)
792 {
793 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
794 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
795 switch (code)
796 {
797 case GE:
798 spu_compare_op1 = GEN_INT (val);
799 code = GT;
800 break;
801 case LT:
802 spu_compare_op1 = GEN_INT (val);
803 code = LE;
804 break;
805 case GEU:
806 spu_compare_op1 = GEN_INT (val);
807 code = GTU;
808 break;
809 case LTU:
810 spu_compare_op1 = GEN_INT (val);
811 code = LEU;
812 break;
813 default:
814 break;
815 }
816 }
817
5d70b918 818 comp_mode = SImode;
819 op_mode = GET_MODE (spu_compare_op0);
820
644459d0 821 switch (code)
822 {
823 case GE:
644459d0 824 scode = SPU_GT;
07027691 825 if (HONOR_NANS (op_mode))
5d70b918 826 {
827 reverse_compare = 0;
828 reverse_test = 0;
829 eq_test = 1;
830 eq_code = SPU_EQ;
831 }
832 else
833 {
834 reverse_compare = 1;
835 reverse_test = 1;
836 }
644459d0 837 break;
838 case LE:
644459d0 839 scode = SPU_GT;
07027691 840 if (HONOR_NANS (op_mode))
5d70b918 841 {
842 reverse_compare = 1;
843 reverse_test = 0;
844 eq_test = 1;
845 eq_code = SPU_EQ;
846 }
847 else
848 {
849 reverse_compare = 0;
850 reverse_test = 1;
851 }
644459d0 852 break;
853 case LT:
854 reverse_compare = 1;
855 reverse_test = 0;
856 scode = SPU_GT;
857 break;
858 case GEU:
859 reverse_compare = 1;
860 reverse_test = 1;
861 scode = SPU_GTU;
862 break;
863 case LEU:
864 reverse_compare = 0;
865 reverse_test = 1;
866 scode = SPU_GTU;
867 break;
868 case LTU:
869 reverse_compare = 1;
870 reverse_test = 0;
871 scode = SPU_GTU;
872 break;
873 case NE:
874 reverse_compare = 0;
875 reverse_test = 1;
876 scode = SPU_EQ;
877 break;
878
879 case EQ:
880 scode = SPU_EQ;
881 break;
882 case GT:
883 scode = SPU_GT;
884 break;
885 case GTU:
886 scode = SPU_GTU;
887 break;
888 default:
889 scode = SPU_EQ;
890 break;
891 }
892
644459d0 893 switch (op_mode)
894 {
895 case QImode:
896 index = 0;
897 comp_mode = QImode;
898 break;
899 case HImode:
900 index = 1;
901 comp_mode = HImode;
902 break;
903 case SImode:
904 index = 2;
905 break;
906 case DImode:
907 index = 3;
908 break;
909 case TImode:
910 index = 4;
911 break;
912 case SFmode:
913 index = 5;
914 break;
915 case DFmode:
916 index = 6;
917 break;
918 case V16QImode:
5474166e 919 index = 7;
920 comp_mode = op_mode;
921 break;
644459d0 922 case V8HImode:
5474166e 923 index = 8;
924 comp_mode = op_mode;
925 break;
644459d0 926 case V4SImode:
5474166e 927 index = 9;
928 comp_mode = op_mode;
929 break;
644459d0 930 case V4SFmode:
5474166e 931 index = 10;
932 comp_mode = V4SImode;
933 break;
644459d0 934 case V2DFmode:
5474166e 935 index = 11;
936 comp_mode = V2DImode;
644459d0 937 break;
5474166e 938 case V2DImode:
644459d0 939 default:
940 abort ();
941 }
942
07027691 943 if (GET_MODE (spu_compare_op1) == DFmode
944 && (scode != SPU_GT && scode != SPU_EQ))
945 abort ();
644459d0 946
947 if (is_set == 0 && spu_compare_op1 == const0_rtx
948 && (GET_MODE (spu_compare_op0) == SImode
949 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
950 {
951 /* Don't need to set a register with the result when we are
952 comparing against zero and branching. */
953 reverse_test = !reverse_test;
954 compare_result = spu_compare_op0;
955 }
956 else
957 {
958 compare_result = gen_reg_rtx (comp_mode);
959
960 if (reverse_compare)
961 {
962 rtx t = spu_compare_op1;
963 spu_compare_op1 = spu_compare_op0;
964 spu_compare_op0 = t;
965 }
966
967 if (spu_comp_icode[index][scode] == 0)
968 abort ();
969
970 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
971 (spu_compare_op0, op_mode))
972 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
973 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
974 (spu_compare_op1, op_mode))
975 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
976 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
977 spu_compare_op0,
978 spu_compare_op1);
979 if (comp_rtx == 0)
980 abort ();
981 emit_insn (comp_rtx);
982
5d70b918 983 if (eq_test)
984 {
985 eq_result = gen_reg_rtx (comp_mode);
986 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
987 spu_compare_op0,
988 spu_compare_op1);
989 if (eq_rtx == 0)
990 abort ();
991 emit_insn (eq_rtx);
992 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
993 gcc_assert (ior_code != CODE_FOR_nothing);
994 emit_insn (GEN_FCN (ior_code)
995 (compare_result, compare_result, eq_result));
996 }
644459d0 997 }
998
999 if (is_set == 0)
1000 {
1001 rtx bcomp;
1002 rtx loc_ref;
1003
1004 /* We don't have branch on QI compare insns, so we convert the
1005 QI compare result to a HI result. */
1006 if (comp_mode == QImode)
1007 {
1008 rtx old_res = compare_result;
1009 compare_result = gen_reg_rtx (HImode);
1010 comp_mode = HImode;
1011 emit_insn (gen_extendqihi2 (compare_result, old_res));
1012 }
1013
1014 if (reverse_test)
1015 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1016 else
1017 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1018
1019 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1020 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1021 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1022 loc_ref, pc_rtx)));
1023 }
1024 else if (is_set == 2)
1025 {
1026 int compare_size = GET_MODE_BITSIZE (comp_mode);
1027 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1028 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1029 rtx select_mask;
1030 rtx op_t = operands[2];
1031 rtx op_f = operands[3];
1032
1033 /* The result of the comparison can be SI, HI or QI mode. Create a
1034 mask based on that result. */
1035 if (target_size > compare_size)
1036 {
1037 select_mask = gen_reg_rtx (mode);
1038 emit_insn (gen_extend_compare (select_mask, compare_result));
1039 }
1040 else if (target_size < compare_size)
1041 select_mask =
1042 gen_rtx_SUBREG (mode, compare_result,
1043 (compare_size - target_size) / BITS_PER_UNIT);
1044 else if (comp_mode != mode)
1045 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1046 else
1047 select_mask = compare_result;
1048
1049 if (GET_MODE (target) != GET_MODE (op_t)
1050 || GET_MODE (target) != GET_MODE (op_f))
1051 abort ();
1052
1053 if (reverse_test)
1054 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1055 else
1056 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1057 }
1058 else
1059 {
1060 if (reverse_test)
1061 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1062 gen_rtx_NOT (comp_mode, compare_result)));
1063 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1064 emit_insn (gen_extendhisi2 (target, compare_result));
1065 else if (GET_MODE (target) == SImode
1066 && GET_MODE (compare_result) == QImode)
1067 emit_insn (gen_extend_compare (target, compare_result));
1068 else
1069 emit_move_insn (target, compare_result);
1070 }
1071}
1072
1073HOST_WIDE_INT
1074const_double_to_hwint (rtx x)
1075{
1076 HOST_WIDE_INT val;
1077 REAL_VALUE_TYPE rv;
1078 if (GET_MODE (x) == SFmode)
1079 {
1080 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1081 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1082 }
1083 else if (GET_MODE (x) == DFmode)
1084 {
1085 long l[2];
1086 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1087 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1088 val = l[0];
1089 val = (val << 32) | (l[1] & 0xffffffff);
1090 }
1091 else
1092 abort ();
1093 return val;
1094}
1095
1096rtx
1097hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1098{
1099 long tv[2];
1100 REAL_VALUE_TYPE rv;
1101 gcc_assert (mode == SFmode || mode == DFmode);
1102
1103 if (mode == SFmode)
1104 tv[0] = (v << 32) >> 32;
1105 else if (mode == DFmode)
1106 {
1107 tv[1] = (v << 32) >> 32;
1108 tv[0] = v >> 32;
1109 }
1110 real_from_target (&rv, tv, mode);
1111 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1112}
1113
1114void
1115print_operand_address (FILE * file, register rtx addr)
1116{
1117 rtx reg;
1118 rtx offset;
1119
e04cf423 1120 if (GET_CODE (addr) == AND
1121 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1122 && INTVAL (XEXP (addr, 1)) == -16)
1123 addr = XEXP (addr, 0);
1124
644459d0 1125 switch (GET_CODE (addr))
1126 {
1127 case REG:
1128 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1129 break;
1130
1131 case PLUS:
1132 reg = XEXP (addr, 0);
1133 offset = XEXP (addr, 1);
1134 if (GET_CODE (offset) == REG)
1135 {
1136 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1137 reg_names[REGNO (offset)]);
1138 }
1139 else if (GET_CODE (offset) == CONST_INT)
1140 {
1141 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1142 INTVAL (offset), reg_names[REGNO (reg)]);
1143 }
1144 else
1145 abort ();
1146 break;
1147
1148 case CONST:
1149 case LABEL_REF:
1150 case SYMBOL_REF:
1151 case CONST_INT:
1152 output_addr_const (file, addr);
1153 break;
1154
1155 default:
1156 debug_rtx (addr);
1157 abort ();
1158 }
1159}
1160
1161void
1162print_operand (FILE * file, rtx x, int code)
1163{
1164 enum machine_mode mode = GET_MODE (x);
1165 HOST_WIDE_INT val;
1166 unsigned char arr[16];
1167 int xcode = GET_CODE (x);
dea01258 1168 int i, info;
644459d0 1169 if (GET_MODE (x) == VOIDmode)
1170 switch (code)
1171 {
644459d0 1172 case 'L': /* 128 bits, signed */
1173 case 'm': /* 128 bits, signed */
1174 case 'T': /* 128 bits, signed */
1175 case 't': /* 128 bits, signed */
1176 mode = TImode;
1177 break;
644459d0 1178 case 'K': /* 64 bits, signed */
1179 case 'k': /* 64 bits, signed */
1180 case 'D': /* 64 bits, signed */
1181 case 'd': /* 64 bits, signed */
1182 mode = DImode;
1183 break;
644459d0 1184 case 'J': /* 32 bits, signed */
1185 case 'j': /* 32 bits, signed */
1186 case 's': /* 32 bits, signed */
1187 case 'S': /* 32 bits, signed */
1188 mode = SImode;
1189 break;
1190 }
1191 switch (code)
1192 {
1193
1194 case 'j': /* 32 bits, signed */
1195 case 'k': /* 64 bits, signed */
1196 case 'm': /* 128 bits, signed */
1197 if (xcode == CONST_INT
1198 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1199 {
1200 gcc_assert (logical_immediate_p (x, mode));
1201 constant_to_array (mode, x, arr);
1202 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1203 val = trunc_int_for_mode (val, SImode);
1204 switch (which_logical_immediate (val))
1205 {
1206 case SPU_ORI:
1207 break;
1208 case SPU_ORHI:
1209 fprintf (file, "h");
1210 break;
1211 case SPU_ORBI:
1212 fprintf (file, "b");
1213 break;
1214 default:
1215 gcc_unreachable();
1216 }
1217 }
1218 else
1219 gcc_unreachable();
1220 return;
1221
1222 case 'J': /* 32 bits, signed */
1223 case 'K': /* 64 bits, signed */
1224 case 'L': /* 128 bits, signed */
1225 if (xcode == CONST_INT
1226 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1227 {
1228 gcc_assert (logical_immediate_p (x, mode)
1229 || iohl_immediate_p (x, mode));
1230 constant_to_array (mode, x, arr);
1231 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1232 val = trunc_int_for_mode (val, SImode);
1233 switch (which_logical_immediate (val))
1234 {
1235 case SPU_ORI:
1236 case SPU_IOHL:
1237 break;
1238 case SPU_ORHI:
1239 val = trunc_int_for_mode (val, HImode);
1240 break;
1241 case SPU_ORBI:
1242 val = trunc_int_for_mode (val, QImode);
1243 break;
1244 default:
1245 gcc_unreachable();
1246 }
1247 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1248 }
1249 else
1250 gcc_unreachable();
1251 return;
1252
1253 case 't': /* 128 bits, signed */
1254 case 'd': /* 64 bits, signed */
1255 case 's': /* 32 bits, signed */
dea01258 1256 if (CONSTANT_P (x))
644459d0 1257 {
dea01258 1258 enum immediate_class c = classify_immediate (x, mode);
1259 switch (c)
1260 {
1261 case IC_IL1:
1262 constant_to_array (mode, x, arr);
1263 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1264 val = trunc_int_for_mode (val, SImode);
1265 switch (which_immediate_load (val))
1266 {
1267 case SPU_IL:
1268 break;
1269 case SPU_ILA:
1270 fprintf (file, "a");
1271 break;
1272 case SPU_ILH:
1273 fprintf (file, "h");
1274 break;
1275 case SPU_ILHU:
1276 fprintf (file, "hu");
1277 break;
1278 default:
1279 gcc_unreachable ();
1280 }
1281 break;
1282 case IC_CPAT:
1283 constant_to_array (mode, x, arr);
1284 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1285 if (info == 1)
1286 fprintf (file, "b");
1287 else if (info == 2)
1288 fprintf (file, "h");
1289 else if (info == 4)
1290 fprintf (file, "w");
1291 else if (info == 8)
1292 fprintf (file, "d");
1293 break;
1294 case IC_IL1s:
1295 if (xcode == CONST_VECTOR)
1296 {
1297 x = CONST_VECTOR_ELT (x, 0);
1298 xcode = GET_CODE (x);
1299 }
1300 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1301 fprintf (file, "a");
1302 else if (xcode == HIGH)
1303 fprintf (file, "hu");
1304 break;
1305 case IC_FSMBI:
5df189be 1306 case IC_FSMBI2:
dea01258 1307 case IC_IL2:
1308 case IC_IL2s:
1309 case IC_POOL:
1310 abort ();
1311 }
644459d0 1312 }
644459d0 1313 else
1314 gcc_unreachable ();
1315 return;
1316
1317 case 'T': /* 128 bits, signed */
1318 case 'D': /* 64 bits, signed */
1319 case 'S': /* 32 bits, signed */
dea01258 1320 if (CONSTANT_P (x))
644459d0 1321 {
dea01258 1322 enum immediate_class c = classify_immediate (x, mode);
1323 switch (c)
644459d0 1324 {
dea01258 1325 case IC_IL1:
1326 constant_to_array (mode, x, arr);
1327 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1328 val = trunc_int_for_mode (val, SImode);
1329 switch (which_immediate_load (val))
1330 {
1331 case SPU_IL:
1332 case SPU_ILA:
1333 break;
1334 case SPU_ILH:
1335 case SPU_ILHU:
1336 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1337 break;
1338 default:
1339 gcc_unreachable ();
1340 }
1341 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1342 break;
1343 case IC_FSMBI:
1344 constant_to_array (mode, x, arr);
1345 val = 0;
1346 for (i = 0; i < 16; i++)
1347 {
1348 val <<= 1;
1349 val |= arr[i] & 1;
1350 }
1351 print_operand (file, GEN_INT (val), 0);
1352 break;
1353 case IC_CPAT:
1354 constant_to_array (mode, x, arr);
1355 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1357 break;
dea01258 1358 case IC_IL1s:
dea01258 1359 if (xcode == HIGH)
5df189be 1360 x = XEXP (x, 0);
1361 if (GET_CODE (x) == CONST_VECTOR)
1362 x = CONST_VECTOR_ELT (x, 0);
1363 output_addr_const (file, x);
1364 if (xcode == HIGH)
1365 fprintf (file, "@h");
644459d0 1366 break;
dea01258 1367 case IC_IL2:
1368 case IC_IL2s:
5df189be 1369 case IC_FSMBI2:
dea01258 1370 case IC_POOL:
1371 abort ();
644459d0 1372 }
c8befdb9 1373 }
644459d0 1374 else
1375 gcc_unreachable ();
1376 return;
1377
644459d0 1378 case 'C':
1379 if (xcode == CONST_INT)
1380 {
1381 /* Only 4 least significant bits are relevant for generate
1382 control word instructions. */
1383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1384 return;
1385 }
1386 break;
1387
1388 case 'M': /* print code for c*d */
1389 if (GET_CODE (x) == CONST_INT)
1390 switch (INTVAL (x))
1391 {
1392 case 1:
1393 fprintf (file, "b");
1394 break;
1395 case 2:
1396 fprintf (file, "h");
1397 break;
1398 case 4:
1399 fprintf (file, "w");
1400 break;
1401 case 8:
1402 fprintf (file, "d");
1403 break;
1404 default:
1405 gcc_unreachable();
1406 }
1407 else
1408 gcc_unreachable();
1409 return;
1410
1411 case 'N': /* Negate the operand */
1412 if (xcode == CONST_INT)
1413 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1414 else if (xcode == CONST_VECTOR)
1415 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1416 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1417 return;
1418
1419 case 'I': /* enable/disable interrupts */
1420 if (xcode == CONST_INT)
1421 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1422 return;
1423
1424 case 'b': /* branch modifiers */
1425 if (xcode == REG)
1426 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1427 else if (COMPARISON_P (x))
1428 fprintf (file, "%s", xcode == NE ? "n" : "");
1429 return;
1430
1431 case 'i': /* indirect call */
1432 if (xcode == MEM)
1433 {
1434 if (GET_CODE (XEXP (x, 0)) == REG)
1435 /* Used in indirect function calls. */
1436 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1437 else
1438 output_address (XEXP (x, 0));
1439 }
1440 return;
1441
1442 case 'p': /* load/store */
1443 if (xcode == MEM)
1444 {
1445 x = XEXP (x, 0);
1446 xcode = GET_CODE (x);
1447 }
e04cf423 1448 if (xcode == AND)
1449 {
1450 x = XEXP (x, 0);
1451 xcode = GET_CODE (x);
1452 }
644459d0 1453 if (xcode == REG)
1454 fprintf (file, "d");
1455 else if (xcode == CONST_INT)
1456 fprintf (file, "a");
1457 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1458 fprintf (file, "r");
1459 else if (xcode == PLUS || xcode == LO_SUM)
1460 {
1461 if (GET_CODE (XEXP (x, 1)) == REG)
1462 fprintf (file, "x");
1463 else
1464 fprintf (file, "d");
1465 }
1466 return;
1467
5df189be 1468 case 'e':
1469 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1470 val &= 0x7;
1471 output_addr_const (file, GEN_INT (val));
1472 return;
1473
1474 case 'f':
1475 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1476 val &= 0x1f;
1477 output_addr_const (file, GEN_INT (val));
1478 return;
1479
1480 case 'g':
1481 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1482 val &= 0x3f;
1483 output_addr_const (file, GEN_INT (val));
1484 return;
1485
1486 case 'h':
1487 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1488 val = (val >> 3) & 0x1f;
1489 output_addr_const (file, GEN_INT (val));
1490 return;
1491
1492 case 'E':
1493 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1494 val = -val;
1495 val &= 0x7;
1496 output_addr_const (file, GEN_INT (val));
1497 return;
1498
1499 case 'F':
1500 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1501 val = -val;
1502 val &= 0x1f;
1503 output_addr_const (file, GEN_INT (val));
1504 return;
1505
1506 case 'G':
1507 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1508 val = -val;
1509 val &= 0x3f;
1510 output_addr_const (file, GEN_INT (val));
1511 return;
1512
1513 case 'H':
1514 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1515 val = -(val & -8ll);
1516 val = (val >> 3) & 0x1f;
1517 output_addr_const (file, GEN_INT (val));
1518 return;
1519
644459d0 1520 case 0:
1521 if (xcode == REG)
1522 fprintf (file, "%s", reg_names[REGNO (x)]);
1523 else if (xcode == MEM)
1524 output_address (XEXP (x, 0));
1525 else if (xcode == CONST_VECTOR)
dea01258 1526 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1527 else
1528 output_addr_const (file, x);
1529 return;
1530
f6a0d06f 1531 /* unused letters
5df189be 1532 o qr uvw yz
1533 AB OPQR UVWXYZ */
644459d0 1534 default:
1535 output_operand_lossage ("invalid %%xn code");
1536 }
1537 gcc_unreachable ();
1538}
1539
1540extern char call_used_regs[];
644459d0 1541
1542/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1543 caller saved register. For leaf functions it is more efficient to
1544 use a volatile register because we won't need to save and restore the
1545 pic register. This routine is only valid after register allocation
1546 is completed, so we can pick an unused register. */
1547static rtx
1548get_pic_reg (void)
1549{
1550 rtx pic_reg = pic_offset_table_rtx;
1551 if (!reload_completed && !reload_in_progress)
1552 abort ();
1553 return pic_reg;
1554}
1555
5df189be 1556/* Split constant addresses to handle cases that are too large.
1557 Add in the pic register when in PIC mode.
1558 Split immediates that require more than 1 instruction. */
dea01258 1559int
1560spu_split_immediate (rtx * ops)
c8befdb9 1561{
dea01258 1562 enum machine_mode mode = GET_MODE (ops[0]);
1563 enum immediate_class c = classify_immediate (ops[1], mode);
1564
1565 switch (c)
c8befdb9 1566 {
dea01258 1567 case IC_IL2:
1568 {
1569 unsigned char arrhi[16];
1570 unsigned char arrlo[16];
98bbec1e 1571 rtx to, temp, hi, lo;
dea01258 1572 int i;
98bbec1e 1573 enum machine_mode imode = mode;
1574 /* We need to do reals as ints because the constant used in the
1575 IOR might not be a legitimate real constant. */
1576 imode = int_mode_for_mode (mode);
dea01258 1577 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1578 if (imode != mode)
1579 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1580 else
1581 to = ops[0];
1582 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1583 for (i = 0; i < 16; i += 4)
1584 {
1585 arrlo[i + 2] = arrhi[i + 2];
1586 arrlo[i + 3] = arrhi[i + 3];
1587 arrlo[i + 0] = arrlo[i + 1] = 0;
1588 arrhi[i + 2] = arrhi[i + 3] = 0;
1589 }
98bbec1e 1590 hi = array_to_constant (imode, arrhi);
1591 lo = array_to_constant (imode, arrlo);
1592 emit_move_insn (temp, hi);
dea01258 1593 emit_insn (gen_rtx_SET
98bbec1e 1594 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1595 return 1;
1596 }
5df189be 1597 case IC_FSMBI2:
1598 {
1599 unsigned char arr_fsmbi[16];
1600 unsigned char arr_andbi[16];
1601 rtx to, reg_fsmbi, reg_and;
1602 int i;
1603 enum machine_mode imode = mode;
1604 /* We need to do reals as ints because the constant used in the
1605 * AND might not be a legitimate real constant. */
1606 imode = int_mode_for_mode (mode);
1607 constant_to_array (mode, ops[1], arr_fsmbi);
1608 if (imode != mode)
1609 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1610 else
1611 to = ops[0];
1612 for (i = 0; i < 16; i++)
1613 if (arr_fsmbi[i] != 0)
1614 {
1615 arr_andbi[0] = arr_fsmbi[i];
1616 arr_fsmbi[i] = 0xff;
1617 }
1618 for (i = 1; i < 16; i++)
1619 arr_andbi[i] = arr_andbi[0];
1620 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1621 reg_and = array_to_constant (imode, arr_andbi);
1622 emit_move_insn (to, reg_fsmbi);
1623 emit_insn (gen_rtx_SET
1624 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1625 return 1;
1626 }
dea01258 1627 case IC_POOL:
1628 if (reload_in_progress || reload_completed)
1629 {
1630 rtx mem = force_const_mem (mode, ops[1]);
1631 if (TARGET_LARGE_MEM)
1632 {
1633 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1634 emit_move_insn (addr, XEXP (mem, 0));
1635 mem = replace_equiv_address (mem, addr);
1636 }
1637 emit_move_insn (ops[0], mem);
1638 return 1;
1639 }
1640 break;
1641 case IC_IL1s:
1642 case IC_IL2s:
1643 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1644 {
1645 if (c == IC_IL2s)
1646 {
5df189be 1647 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1648 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1649 }
1650 else if (flag_pic)
1651 emit_insn (gen_pic (ops[0], ops[1]));
1652 if (flag_pic)
1653 {
1654 rtx pic_reg = get_pic_reg ();
1655 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1656 crtl->uses_pic_offset_table = 1;
dea01258 1657 }
1658 return flag_pic || c == IC_IL2s;
1659 }
1660 break;
1661 case IC_IL1:
1662 case IC_FSMBI:
1663 case IC_CPAT:
1664 break;
c8befdb9 1665 }
dea01258 1666 return 0;
c8befdb9 1667}
1668
644459d0 1669/* SAVING is TRUE when we are generating the actual load and store
1670 instructions for REGNO. When determining the size of the stack
1671 needed for saving register we must allocate enough space for the
1672 worst case, because we don't always have the information early enough
1673 to not allocate it. But we can at least eliminate the actual loads
1674 and stores during the prologue/epilogue. */
1675static int
1676need_to_save_reg (int regno, int saving)
1677{
3072d30e 1678 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1679 return 1;
1680 if (flag_pic
1681 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1682 && (!saving || crtl->uses_pic_offset_table)
644459d0 1683 && (!saving
3072d30e 1684 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1685 return 1;
1686 return 0;
1687}
1688
1689/* This function is only correct starting with local register
1690 allocation */
1691int
1692spu_saved_regs_size (void)
1693{
1694 int reg_save_size = 0;
1695 int regno;
1696
1697 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1698 if (need_to_save_reg (regno, 0))
1699 reg_save_size += 0x10;
1700 return reg_save_size;
1701}
1702
1703static rtx
1704frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1705{
1706 rtx reg = gen_rtx_REG (V4SImode, regno);
1707 rtx mem =
1708 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1709 return emit_insn (gen_movv4si (mem, reg));
1710}
1711
1712static rtx
1713frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1714{
1715 rtx reg = gen_rtx_REG (V4SImode, regno);
1716 rtx mem =
1717 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1718 return emit_insn (gen_movv4si (reg, mem));
1719}
1720
1721/* This happens after reload, so we need to expand it. */
1722static rtx
1723frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1724{
1725 rtx insn;
1726 if (satisfies_constraint_K (GEN_INT (imm)))
1727 {
1728 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1729 }
1730 else
1731 {
3072d30e 1732 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1733 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1734 if (REGNO (src) == REGNO (scratch))
1735 abort ();
1736 }
644459d0 1737 return insn;
1738}
1739
1740/* Return nonzero if this function is known to have a null epilogue. */
1741
1742int
1743direct_return (void)
1744{
1745 if (reload_completed)
1746 {
1747 if (cfun->static_chain_decl == 0
1748 && (spu_saved_regs_size ()
1749 + get_frame_size ()
abe32cce 1750 + crtl->outgoing_args_size
1751 + crtl->args.pretend_args_size == 0)
644459d0 1752 && current_function_is_leaf)
1753 return 1;
1754 }
1755 return 0;
1756}
1757
1758/*
1759 The stack frame looks like this:
1760 +-------------+
1761 | incoming |
1762 AP | args |
1763 +-------------+
1764 | $lr save |
1765 +-------------+
1766 prev SP | back chain |
1767 +-------------+
1768 | var args |
abe32cce 1769 | reg save | crtl->args.pretend_args_size bytes
644459d0 1770 +-------------+
1771 | ... |
1772 | saved regs | spu_saved_regs_size() bytes
1773 +-------------+
1774 | ... |
1775 FP | vars | get_frame_size() bytes
1776 +-------------+
1777 | ... |
1778 | outgoing |
abe32cce 1779 | args | crtl->outgoing_args_size bytes
644459d0 1780 +-------------+
1781 | $lr of next |
1782 | frame |
1783 +-------------+
1784 SP | back chain |
1785 +-------------+
1786
1787*/
1788void
1789spu_expand_prologue (void)
1790{
1791 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1792 HOST_WIDE_INT total_size;
1793 HOST_WIDE_INT saved_regs_size;
1794 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1795 rtx scratch_reg_0, scratch_reg_1;
1796 rtx insn, real;
1797
1798 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1799 the "toplevel" insn chain. */
1800 emit_note (NOTE_INSN_DELETED);
1801
1802 if (flag_pic && optimize == 0)
18d50ae6 1803 crtl->uses_pic_offset_table = 1;
644459d0 1804
1805 if (spu_naked_function_p (current_function_decl))
1806 return;
1807
1808 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1809 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1810
1811 saved_regs_size = spu_saved_regs_size ();
1812 total_size = size + saved_regs_size
abe32cce 1813 + crtl->outgoing_args_size
1814 + crtl->args.pretend_args_size;
644459d0 1815
1816 if (!current_function_is_leaf
18d50ae6 1817 || cfun->calls_alloca || total_size > 0)
644459d0 1818 total_size += STACK_POINTER_OFFSET;
1819
1820 /* Save this first because code after this might use the link
1821 register as a scratch register. */
1822 if (!current_function_is_leaf)
1823 {
1824 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1825 RTX_FRAME_RELATED_P (insn) = 1;
1826 }
1827
1828 if (total_size > 0)
1829 {
abe32cce 1830 offset = -crtl->args.pretend_args_size;
644459d0 1831 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1832 if (need_to_save_reg (regno, 1))
1833 {
1834 offset -= 16;
1835 insn = frame_emit_store (regno, sp_reg, offset);
1836 RTX_FRAME_RELATED_P (insn) = 1;
1837 }
1838 }
1839
18d50ae6 1840 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1841 {
1842 rtx pic_reg = get_pic_reg ();
1843 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1844 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1845 }
1846
1847 if (total_size > 0)
1848 {
1849 if (flag_stack_check)
1850 {
d819917f 1851 /* We compare against total_size-1 because
644459d0 1852 ($sp >= total_size) <=> ($sp > total_size-1) */
1853 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1854 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1855 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1856 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1857 {
1858 emit_move_insn (scratch_v4si, size_v4si);
1859 size_v4si = scratch_v4si;
1860 }
1861 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1862 emit_insn (gen_vec_extractv4si
1863 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1864 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1865 }
1866
1867 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1868 the value of the previous $sp because we save it as the back
1869 chain. */
1870 if (total_size <= 2000)
1871 {
1872 /* In this case we save the back chain first. */
1873 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1874 insn =
1875 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1876 }
1877 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1878 {
1879 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1880 insn =
1881 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1882 }
1883 else
1884 {
1885 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1886 insn =
1887 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1888 }
1889 RTX_FRAME_RELATED_P (insn) = 1;
1890 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1891 REG_NOTES (insn) =
1892 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1893
1894 if (total_size > 2000)
1895 {
1896 /* Save the back chain ptr */
1897 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1898 }
1899
1900 if (frame_pointer_needed)
1901 {
1902 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1903 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1904 + crtl->outgoing_args_size;
644459d0 1905 /* Set the new frame_pointer */
d8dfeb55 1906 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1907 RTX_FRAME_RELATED_P (insn) = 1;
1908 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1909 REG_NOTES (insn) =
1910 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1911 real, REG_NOTES (insn));
5df189be 1912 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1913 }
1914 }
1915
1916 emit_note (NOTE_INSN_DELETED);
1917}
1918
1919void
1920spu_expand_epilogue (bool sibcall_p)
1921{
1922 int size = get_frame_size (), offset, regno;
1923 HOST_WIDE_INT saved_regs_size, total_size;
1924 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1925 rtx jump, scratch_reg_0;
1926
1927 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1928 the "toplevel" insn chain. */
1929 emit_note (NOTE_INSN_DELETED);
1930
1931 if (spu_naked_function_p (current_function_decl))
1932 return;
1933
1934 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1935
1936 saved_regs_size = spu_saved_regs_size ();
1937 total_size = size + saved_regs_size
abe32cce 1938 + crtl->outgoing_args_size
1939 + crtl->args.pretend_args_size;
644459d0 1940
1941 if (!current_function_is_leaf
18d50ae6 1942 || cfun->calls_alloca || total_size > 0)
644459d0 1943 total_size += STACK_POINTER_OFFSET;
1944
1945 if (total_size > 0)
1946 {
18d50ae6 1947 if (cfun->calls_alloca)
644459d0 1948 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1949 else
1950 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1951
1952
1953 if (saved_regs_size > 0)
1954 {
abe32cce 1955 offset = -crtl->args.pretend_args_size;
644459d0 1956 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1957 if (need_to_save_reg (regno, 1))
1958 {
1959 offset -= 0x10;
1960 frame_emit_load (regno, sp_reg, offset);
1961 }
1962 }
1963 }
1964
1965 if (!current_function_is_leaf)
1966 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1967
1968 if (!sibcall_p)
1969 {
18b42941 1970 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 1971 jump = emit_jump_insn (gen__return ());
1972 emit_barrier_after (jump);
1973 }
1974
1975 emit_note (NOTE_INSN_DELETED);
1976}
1977
1978rtx
1979spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1980{
1981 if (count != 0)
1982 return 0;
1983 /* This is inefficient because it ends up copying to a save-register
1984 which then gets saved even though $lr has already been saved. But
1985 it does generate better code for leaf functions and we don't need
1986 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1987 used for __builtin_return_address anyway, so maybe we don't care if
1988 it's inefficient. */
1989 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1990}
1991\f
1992
1993/* Given VAL, generate a constant appropriate for MODE.
1994 If MODE is a vector mode, every element will be VAL.
1995 For TImode, VAL will be zero extended to 128 bits. */
1996rtx
1997spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1998{
1999 rtx inner;
2000 rtvec v;
2001 int units, i;
2002
2003 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2004 || GET_MODE_CLASS (mode) == MODE_FLOAT
2005 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2006 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2007
2008 if (GET_MODE_CLASS (mode) == MODE_INT)
2009 return immed_double_const (val, 0, mode);
2010
2011 /* val is the bit representation of the float */
2012 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2013 return hwint_to_const_double (mode, val);
2014
2015 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2016 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2017 else
2018 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2019
2020 units = GET_MODE_NUNITS (mode);
2021
2022 v = rtvec_alloc (units);
2023
2024 for (i = 0; i < units; ++i)
2025 RTVEC_ELT (v, i) = inner;
2026
2027 return gen_rtx_CONST_VECTOR (mode, v);
2028}
644459d0 2029
5474166e 2030/* Create a MODE vector constant from 4 ints. */
2031rtx
2032spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2033{
2034 unsigned char arr[16];
2035 arr[0] = (a >> 24) & 0xff;
2036 arr[1] = (a >> 16) & 0xff;
2037 arr[2] = (a >> 8) & 0xff;
2038 arr[3] = (a >> 0) & 0xff;
2039 arr[4] = (b >> 24) & 0xff;
2040 arr[5] = (b >> 16) & 0xff;
2041 arr[6] = (b >> 8) & 0xff;
2042 arr[7] = (b >> 0) & 0xff;
2043 arr[8] = (c >> 24) & 0xff;
2044 arr[9] = (c >> 16) & 0xff;
2045 arr[10] = (c >> 8) & 0xff;
2046 arr[11] = (c >> 0) & 0xff;
2047 arr[12] = (d >> 24) & 0xff;
2048 arr[13] = (d >> 16) & 0xff;
2049 arr[14] = (d >> 8) & 0xff;
2050 arr[15] = (d >> 0) & 0xff;
2051 return array_to_constant(mode, arr);
2052}
5a976006 2053\f
2054/* branch hint stuff */
5474166e 2055
644459d0 2056/* An array of these is used to propagate hints to predecessor blocks. */
2057struct spu_bb_info
2058{
5a976006 2059 rtx prop_jump; /* propagated from another block */
2060 int bb_index; /* the original block. */
644459d0 2061};
5a976006 2062static struct spu_bb_info *spu_bb_info;
644459d0 2063
5a976006 2064#define STOP_HINT_P(INSN) \
2065 (GET_CODE(INSN) == CALL_INSN \
2066 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2067 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2068
2069/* 1 when RTX is a hinted branch or its target. We keep track of
2070 what has been hinted so the safe-hint code can test it easily. */
2071#define HINTED_P(RTX) \
2072 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2073
2074/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2075#define SCHED_ON_EVEN_P(RTX) \
2076 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2077
2078/* Emit a nop for INSN such that the two will dual issue. This assumes
2079 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2080 We check for TImode to handle a MULTI1 insn which has dual issued its
2081 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2082 ADDR_VEC insns. */
2083static void
2084emit_nop_for_insn (rtx insn)
644459d0 2085{
5a976006 2086 int p;
2087 rtx new_insn;
2088 p = get_pipe (insn);
2089 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2090 new_insn = emit_insn_after (gen_lnop (), insn);
2091 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2092 {
5a976006 2093 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2094 PUT_MODE (new_insn, TImode);
2095 PUT_MODE (insn, VOIDmode);
2096 }
2097 else
2098 new_insn = emit_insn_after (gen_lnop (), insn);
2099 recog_memoized (new_insn);
2100}
2101
2102/* Insert nops in basic blocks to meet dual issue alignment
2103 requirements. Also make sure hbrp and hint instructions are at least
2104 one cycle apart, possibly inserting a nop. */
2105static void
2106pad_bb(void)
2107{
2108 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2109 int length;
2110 int addr;
2111
2112 /* This sets up INSN_ADDRESSES. */
2113 shorten_branches (get_insns ());
2114
2115 /* Keep track of length added by nops. */
2116 length = 0;
2117
2118 prev_insn = 0;
2119 insn = get_insns ();
2120 if (!active_insn_p (insn))
2121 insn = next_active_insn (insn);
2122 for (; insn; insn = next_insn)
2123 {
2124 next_insn = next_active_insn (insn);
2125 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2126 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2127 {
5a976006 2128 if (hbr_insn)
2129 {
2130 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2131 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2132 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2133 || (a1 - a0 == 4))
2134 {
2135 prev_insn = emit_insn_before (gen_lnop (), insn);
2136 PUT_MODE (prev_insn, GET_MODE (insn));
2137 PUT_MODE (insn, TImode);
2138 length += 4;
2139 }
2140 }
2141 hbr_insn = insn;
2142 }
2143 if (INSN_CODE (insn) == CODE_FOR_blockage)
2144 {
2145 if (GET_MODE (insn) == TImode)
2146 PUT_MODE (next_insn, TImode);
2147 insn = next_insn;
2148 next_insn = next_active_insn (insn);
2149 }
2150 addr = INSN_ADDRESSES (INSN_UID (insn));
2151 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2152 {
2153 if (((addr + length) & 7) != 0)
2154 {
2155 emit_nop_for_insn (prev_insn);
2156 length += 4;
2157 }
644459d0 2158 }
5a976006 2159 else if (GET_MODE (insn) == TImode
2160 && ((next_insn && GET_MODE (next_insn) != TImode)
2161 || get_attr_type (insn) == TYPE_MULTI0)
2162 && ((addr + length) & 7) != 0)
2163 {
2164 /* prev_insn will always be set because the first insn is
2165 always 8-byte aligned. */
2166 emit_nop_for_insn (prev_insn);
2167 length += 4;
2168 }
2169 prev_insn = insn;
644459d0 2170 }
644459d0 2171}
2172
5a976006 2173\f
2174/* Routines for branch hints. */
2175
644459d0 2176static void
5a976006 2177spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2178 int distance, sbitmap blocks)
644459d0 2179{
5a976006 2180 rtx branch_label = 0;
2181 rtx hint;
2182 rtx insn;
2183 rtx table;
644459d0 2184
2185 if (before == 0 || branch == 0 || target == 0)
2186 return;
2187
5a976006 2188 /* While scheduling we require hints to be no further than 600, so
2189 we need to enforce that here too */
644459d0 2190 if (distance > 600)
2191 return;
2192
5a976006 2193 /* If we have a Basic block note, emit it after the basic block note. */
2194 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2195 before = NEXT_INSN (before);
644459d0 2196
2197 branch_label = gen_label_rtx ();
2198 LABEL_NUSES (branch_label)++;
2199 LABEL_PRESERVE_P (branch_label) = 1;
2200 insn = emit_label_before (branch_label, branch);
2201 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2202 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2203
2204 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2205 recog_memoized (hint);
2206 HINTED_P (branch) = 1;
644459d0 2207
5a976006 2208 if (GET_CODE (target) == LABEL_REF)
2209 HINTED_P (XEXP (target, 0)) = 1;
2210 else if (tablejump_p (branch, 0, &table))
644459d0 2211 {
5a976006 2212 rtvec vec;
2213 int j;
2214 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2215 vec = XVEC (PATTERN (table), 0);
2216 else
2217 vec = XVEC (PATTERN (table), 1);
2218 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2219 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2220 }
5a976006 2221
2222 if (distance >= 588)
644459d0 2223 {
5a976006 2224 /* Make sure the hint isn't scheduled any earlier than this point,
2225 which could make it too far for the branch offest to fit */
2226 recog_memoized (emit_insn_before (gen_blockage (), hint));
2227 }
2228 else if (distance <= 8 * 4)
2229 {
2230 /* To guarantee at least 8 insns between the hint and branch we
2231 insert nops. */
2232 int d;
2233 for (d = distance; d < 8 * 4; d += 4)
2234 {
2235 insn =
2236 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2237 recog_memoized (insn);
2238 }
2239
2240 /* Make sure any nops inserted aren't scheduled before the hint. */
2241 recog_memoized (emit_insn_after (gen_blockage (), hint));
2242
2243 /* Make sure any nops inserted aren't scheduled after the call. */
2244 if (CALL_P (branch) && distance < 8 * 4)
2245 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2246 }
644459d0 2247}
2248
2249/* Returns 0 if we don't want a hint for this branch. Otherwise return
2250 the rtx for the branch target. */
2251static rtx
2252get_branch_target (rtx branch)
2253{
2254 if (GET_CODE (branch) == JUMP_INSN)
2255 {
2256 rtx set, src;
2257
2258 /* Return statements */
2259 if (GET_CODE (PATTERN (branch)) == RETURN)
2260 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2261
2262 /* jump table */
2263 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2264 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2265 return 0;
2266
2267 set = single_set (branch);
2268 src = SET_SRC (set);
2269 if (GET_CODE (SET_DEST (set)) != PC)
2270 abort ();
2271
2272 if (GET_CODE (src) == IF_THEN_ELSE)
2273 {
2274 rtx lab = 0;
2275 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2276 if (note)
2277 {
2278 /* If the more probable case is not a fall through, then
2279 try a branch hint. */
2280 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2281 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2282 && GET_CODE (XEXP (src, 1)) != PC)
2283 lab = XEXP (src, 1);
2284 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2285 && GET_CODE (XEXP (src, 2)) != PC)
2286 lab = XEXP (src, 2);
2287 }
2288 if (lab)
2289 {
2290 if (GET_CODE (lab) == RETURN)
2291 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2292 return lab;
2293 }
2294 return 0;
2295 }
2296
2297 return src;
2298 }
2299 else if (GET_CODE (branch) == CALL_INSN)
2300 {
2301 rtx call;
2302 /* All of our call patterns are in a PARALLEL and the CALL is
2303 the first pattern in the PARALLEL. */
2304 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2305 abort ();
2306 call = XVECEXP (PATTERN (branch), 0, 0);
2307 if (GET_CODE (call) == SET)
2308 call = SET_SRC (call);
2309 if (GET_CODE (call) != CALL)
2310 abort ();
2311 return XEXP (XEXP (call, 0), 0);
2312 }
2313 return 0;
2314}
2315
5a976006 2316/* The special $hbr register is used to prevent the insn scheduler from
2317 moving hbr insns across instructions which invalidate them. It
2318 should only be used in a clobber, and this function searches for
2319 insns which clobber it. */
2320static bool
2321insn_clobbers_hbr (rtx insn)
2322{
2323 if (INSN_P (insn)
2324 && GET_CODE (PATTERN (insn)) == PARALLEL)
2325 {
2326 rtx parallel = PATTERN (insn);
2327 rtx clobber;
2328 int j;
2329 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2330 {
2331 clobber = XVECEXP (parallel, 0, j);
2332 if (GET_CODE (clobber) == CLOBBER
2333 && GET_CODE (XEXP (clobber, 0)) == REG
2334 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2335 return 1;
2336 }
2337 }
2338 return 0;
2339}
2340
2341/* Search up to 32 insns starting at FIRST:
2342 - at any kind of hinted branch, just return
2343 - at any unconditional branch in the first 15 insns, just return
2344 - at a call or indirect branch, after the first 15 insns, force it to
2345 an even address and return
2346 - at any unconditional branch, after the first 15 insns, force it to
2347 an even address.
2348 At then end of the search, insert an hbrp within 4 insns of FIRST,
2349 and an hbrp within 16 instructions of FIRST.
2350 */
644459d0 2351static void
5a976006 2352insert_hbrp_for_ilb_runout (rtx first)
644459d0 2353{
5a976006 2354 rtx insn, before_4 = 0, before_16 = 0;
2355 int addr = 0, length, first_addr = -1;
2356 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2357 int insert_lnop_after = 0;
2358 for (insn = first; insn; insn = NEXT_INSN (insn))
2359 if (INSN_P (insn))
2360 {
2361 if (first_addr == -1)
2362 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2363 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2364 length = get_attr_length (insn);
2365
2366 if (before_4 == 0 && addr + length >= 4 * 4)
2367 before_4 = insn;
2368 /* We test for 14 instructions because the first hbrp will add
2369 up to 2 instructions. */
2370 if (before_16 == 0 && addr + length >= 14 * 4)
2371 before_16 = insn;
2372
2373 if (INSN_CODE (insn) == CODE_FOR_hbr)
2374 {
2375 /* Make sure an hbrp is at least 2 cycles away from a hint.
2376 Insert an lnop after the hbrp when necessary. */
2377 if (before_4 == 0 && addr > 0)
2378 {
2379 before_4 = insn;
2380 insert_lnop_after |= 1;
2381 }
2382 else if (before_4 && addr <= 4 * 4)
2383 insert_lnop_after |= 1;
2384 if (before_16 == 0 && addr > 10 * 4)
2385 {
2386 before_16 = insn;
2387 insert_lnop_after |= 2;
2388 }
2389 else if (before_16 && addr <= 14 * 4)
2390 insert_lnop_after |= 2;
2391 }
644459d0 2392
5a976006 2393 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2394 {
2395 if (addr < hbrp_addr0)
2396 hbrp_addr0 = addr;
2397 else if (addr < hbrp_addr1)
2398 hbrp_addr1 = addr;
2399 }
644459d0 2400
5a976006 2401 if (CALL_P (insn) || JUMP_P (insn))
2402 {
2403 if (HINTED_P (insn))
2404 return;
2405
2406 /* Any branch after the first 15 insns should be on an even
2407 address to avoid a special case branch. There might be
2408 some nops and/or hbrps inserted, so we test after 10
2409 insns. */
2410 if (addr > 10 * 4)
2411 SCHED_ON_EVEN_P (insn) = 1;
2412 }
644459d0 2413
5a976006 2414 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2415 return;
2416
2417
2418 if (addr + length >= 32 * 4)
644459d0 2419 {
5a976006 2420 gcc_assert (before_4 && before_16);
2421 if (hbrp_addr0 > 4 * 4)
644459d0 2422 {
5a976006 2423 insn =
2424 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2425 recog_memoized (insn);
2426 INSN_ADDRESSES_NEW (insn,
2427 INSN_ADDRESSES (INSN_UID (before_4)));
2428 PUT_MODE (insn, GET_MODE (before_4));
2429 PUT_MODE (before_4, TImode);
2430 if (insert_lnop_after & 1)
644459d0 2431 {
5a976006 2432 insn = emit_insn_before (gen_lnop (), before_4);
2433 recog_memoized (insn);
2434 INSN_ADDRESSES_NEW (insn,
2435 INSN_ADDRESSES (INSN_UID (before_4)));
2436 PUT_MODE (insn, TImode);
644459d0 2437 }
644459d0 2438 }
5a976006 2439 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2440 && hbrp_addr1 > 16 * 4)
644459d0 2441 {
5a976006 2442 insn =
2443 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2444 recog_memoized (insn);
2445 INSN_ADDRESSES_NEW (insn,
2446 INSN_ADDRESSES (INSN_UID (before_16)));
2447 PUT_MODE (insn, GET_MODE (before_16));
2448 PUT_MODE (before_16, TImode);
2449 if (insert_lnop_after & 2)
644459d0 2450 {
5a976006 2451 insn = emit_insn_before (gen_lnop (), before_16);
2452 recog_memoized (insn);
2453 INSN_ADDRESSES_NEW (insn,
2454 INSN_ADDRESSES (INSN_UID
2455 (before_16)));
2456 PUT_MODE (insn, TImode);
644459d0 2457 }
2458 }
5a976006 2459 return;
644459d0 2460 }
644459d0 2461 }
5a976006 2462 else if (BARRIER_P (insn))
2463 return;
644459d0 2464
644459d0 2465}
5a976006 2466
2467/* The SPU might hang when it executes 48 inline instructions after a
2468 hinted branch jumps to its hinted target. The beginning of a
2469 function and the return from a call might have been hinted, and must
2470 be handled as well. To prevent a hang we insert 2 hbrps. The first
2471 should be within 6 insns of the branch target. The second should be
2472 within 22 insns of the branch target. When determining if hbrps are
2473 necessary, we look for only 32 inline instructions, because up to to
2474 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2475 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2476static void
5a976006 2477insert_hbrp (void)
644459d0 2478{
5a976006 2479 rtx insn;
2480 if (TARGET_SAFE_HINTS)
644459d0 2481 {
5a976006 2482 shorten_branches (get_insns ());
2483 /* Insert hbrp at beginning of function */
2484 insn = next_active_insn (get_insns ());
2485 if (insn)
2486 insert_hbrp_for_ilb_runout (insn);
2487 /* Insert hbrp after hinted targets. */
2488 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2489 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2490 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2491 }
644459d0 2492}
2493
5a976006 2494static int in_spu_reorg;
2495
2496/* Insert branch hints. There are no branch optimizations after this
2497 pass, so it's safe to set our branch hints now. */
644459d0 2498static void
5a976006 2499spu_machine_dependent_reorg (void)
644459d0 2500{
5a976006 2501 sbitmap blocks;
2502 basic_block bb;
2503 rtx branch, insn;
2504 rtx branch_target = 0;
2505 int branch_addr = 0, insn_addr, required_dist = 0;
2506 int i;
2507 unsigned int j;
644459d0 2508
5a976006 2509 if (!TARGET_BRANCH_HINTS || optimize == 0)
2510 {
2511 /* We still do it for unoptimized code because an external
2512 function might have hinted a call or return. */
2513 insert_hbrp ();
2514 pad_bb ();
2515 return;
2516 }
644459d0 2517
5a976006 2518 blocks = sbitmap_alloc (last_basic_block);
2519 sbitmap_zero (blocks);
644459d0 2520
5a976006 2521 in_spu_reorg = 1;
2522 compute_bb_for_insn ();
2523
2524 compact_blocks ();
2525
2526 spu_bb_info =
2527 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2528 sizeof (struct spu_bb_info));
2529
2530 /* We need exact insn addresses and lengths. */
2531 shorten_branches (get_insns ());
2532
2533 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2534 {
5a976006 2535 bb = BASIC_BLOCK (i);
2536 branch = 0;
2537 if (spu_bb_info[i].prop_jump)
644459d0 2538 {
5a976006 2539 branch = spu_bb_info[i].prop_jump;
2540 branch_target = get_branch_target (branch);
2541 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2542 required_dist = spu_hint_dist;
2543 }
2544 /* Search from end of a block to beginning. In this loop, find
2545 jumps which need a branch and emit them only when:
2546 - it's an indirect branch and we're at the insn which sets
2547 the register
2548 - we're at an insn that will invalidate the hint. e.g., a
2549 call, another hint insn, inline asm that clobbers $hbr, and
2550 some inlined operations (divmodsi4). Don't consider jumps
2551 because they are only at the end of a block and are
2552 considered when we are deciding whether to propagate
2553 - we're getting too far away from the branch. The hbr insns
2554 only have a signed 10 bit offset
2555 We go back as far as possible so the branch will be considered
2556 for propagation when we get to the beginning of the block. */
2557 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2558 {
2559 if (INSN_P (insn))
2560 {
2561 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2562 if (branch
2563 && ((GET_CODE (branch_target) == REG
2564 && set_of (branch_target, insn) != NULL_RTX)
2565 || insn_clobbers_hbr (insn)
2566 || branch_addr - insn_addr > 600))
2567 {
2568 rtx next = NEXT_INSN (insn);
2569 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2570 if (insn != BB_END (bb)
2571 && branch_addr - next_addr >= required_dist)
2572 {
2573 if (dump_file)
2574 fprintf (dump_file,
2575 "hint for %i in block %i before %i\n",
2576 INSN_UID (branch), bb->index,
2577 INSN_UID (next));
2578 spu_emit_branch_hint (next, branch, branch_target,
2579 branch_addr - next_addr, blocks);
2580 }
2581 branch = 0;
2582 }
2583
2584 /* JUMP_P will only be true at the end of a block. When
2585 branch is already set it means we've previously decided
2586 to propagate a hint for that branch into this block. */
2587 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2588 {
2589 branch = 0;
2590 if ((branch_target = get_branch_target (insn)))
2591 {
2592 branch = insn;
2593 branch_addr = insn_addr;
2594 required_dist = spu_hint_dist;
2595 }
2596 }
2597 }
2598 if (insn == BB_HEAD (bb))
2599 break;
2600 }
2601
2602 if (branch)
2603 {
2604 /* If we haven't emitted a hint for this branch yet, it might
2605 be profitable to emit it in one of the predecessor blocks,
2606 especially for loops. */
2607 rtx bbend;
2608 basic_block prev = 0, prop = 0, prev2 = 0;
2609 int loop_exit = 0, simple_loop = 0;
2610 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2611
2612 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2613 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2614 prev = EDGE_PRED (bb, j)->src;
2615 else
2616 prev2 = EDGE_PRED (bb, j)->src;
2617
2618 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2619 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2620 loop_exit = 1;
2621 else if (EDGE_SUCC (bb, j)->dest == bb)
2622 simple_loop = 1;
2623
2624 /* If this branch is a loop exit then propagate to previous
2625 fallthru block. This catches the cases when it is a simple
2626 loop or when there is an initial branch into the loop. */
2627 if (prev && (loop_exit || simple_loop)
2628 && prev->loop_depth <= bb->loop_depth)
2629 prop = prev;
2630
2631 /* If there is only one adjacent predecessor. Don't propagate
2632 outside this loop. This loop_depth test isn't perfect, but
2633 I'm not sure the loop_father member is valid at this point. */
2634 else if (prev && single_pred_p (bb)
2635 && prev->loop_depth == bb->loop_depth)
2636 prop = prev;
2637
2638 /* If this is the JOIN block of a simple IF-THEN then
2639 propogate the hint to the HEADER block. */
2640 else if (prev && prev2
2641 && EDGE_COUNT (bb->preds) == 2
2642 && EDGE_COUNT (prev->preds) == 1
2643 && EDGE_PRED (prev, 0)->src == prev2
2644 && prev2->loop_depth == bb->loop_depth
2645 && GET_CODE (branch_target) != REG)
2646 prop = prev;
2647
2648 /* Don't propagate when:
2649 - this is a simple loop and the hint would be too far
2650 - this is not a simple loop and there are 16 insns in
2651 this block already
2652 - the predecessor block ends in a branch that will be
2653 hinted
2654 - the predecessor block ends in an insn that invalidates
2655 the hint */
2656 if (prop
2657 && prop->index >= 0
2658 && (bbend = BB_END (prop))
2659 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2660 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2661 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2662 {
2663 if (dump_file)
2664 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2665 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2666 bb->index, prop->index, bb->loop_depth,
2667 INSN_UID (branch), loop_exit, simple_loop,
2668 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2669
2670 spu_bb_info[prop->index].prop_jump = branch;
2671 spu_bb_info[prop->index].bb_index = i;
2672 }
2673 else if (branch_addr - next_addr >= required_dist)
2674 {
2675 if (dump_file)
2676 fprintf (dump_file, "hint for %i in block %i before %i\n",
2677 INSN_UID (branch), bb->index,
2678 INSN_UID (NEXT_INSN (insn)));
2679 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2680 branch_addr - next_addr, blocks);
2681 }
2682 branch = 0;
644459d0 2683 }
644459d0 2684 }
5a976006 2685 free (spu_bb_info);
644459d0 2686
5a976006 2687 if (!sbitmap_empty_p (blocks))
2688 find_many_sub_basic_blocks (blocks);
2689
2690 /* We have to schedule to make sure alignment is ok. */
2691 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2692
2693 /* The hints need to be scheduled, so call it again. */
2694 schedule_insns ();
2695
2696 insert_hbrp ();
2697
2698 pad_bb ();
2699
2700
2701 if (spu_flag_var_tracking)
644459d0 2702 {
5a976006 2703 df_analyze ();
2704 timevar_push (TV_VAR_TRACKING);
2705 variable_tracking_main ();
2706 timevar_pop (TV_VAR_TRACKING);
2707 df_finish_pass (false);
644459d0 2708 }
5a976006 2709
2710 free_bb_for_insn ();
2711
2712 in_spu_reorg = 0;
644459d0 2713}
2714\f
2715
2716/* Insn scheduling routines, primarily for dual issue. */
2717static int
2718spu_sched_issue_rate (void)
2719{
2720 return 2;
2721}
2722
2723static int
5a976006 2724uses_ls_unit(rtx insn)
644459d0 2725{
5a976006 2726 rtx set = single_set (insn);
2727 if (set != 0
2728 && (GET_CODE (SET_DEST (set)) == MEM
2729 || GET_CODE (SET_SRC (set)) == MEM))
2730 return 1;
2731 return 0;
644459d0 2732}
2733
2734static int
2735get_pipe (rtx insn)
2736{
2737 enum attr_type t;
2738 /* Handle inline asm */
2739 if (INSN_CODE (insn) == -1)
2740 return -1;
2741 t = get_attr_type (insn);
2742 switch (t)
2743 {
2744 case TYPE_CONVERT:
2745 return -2;
2746 case TYPE_MULTI0:
2747 return -1;
2748
2749 case TYPE_FX2:
2750 case TYPE_FX3:
2751 case TYPE_SPR:
2752 case TYPE_NOP:
2753 case TYPE_FXB:
2754 case TYPE_FPD:
2755 case TYPE_FP6:
2756 case TYPE_FP7:
644459d0 2757 return 0;
2758
2759 case TYPE_LNOP:
2760 case TYPE_SHUF:
2761 case TYPE_LOAD:
2762 case TYPE_STORE:
2763 case TYPE_BR:
2764 case TYPE_MULTI1:
2765 case TYPE_HBR:
5a976006 2766 case TYPE_IPREFETCH:
644459d0 2767 return 1;
2768 default:
2769 abort ();
2770 }
2771}
2772
5a976006 2773
2774/* haifa-sched.c has a static variable that keeps track of the current
2775 cycle. It is passed to spu_sched_reorder, and we record it here for
2776 use by spu_sched_variable_issue. It won't be accurate if the
2777 scheduler updates it's clock_var between the two calls. */
2778static int clock_var;
2779
2780/* This is used to keep track of insn alignment. Set to 0 at the
2781 beginning of each block and increased by the "length" attr of each
2782 insn scheduled. */
2783static int spu_sched_length;
2784
2785/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2786 ready list appropriately in spu_sched_reorder(). */
2787static int pipe0_clock;
2788static int pipe1_clock;
2789
2790static int prev_clock_var;
2791
2792static int prev_priority;
2793
2794/* The SPU needs to load the next ilb sometime during the execution of
2795 the previous ilb. There is a potential conflict if every cycle has a
2796 load or store. To avoid the conflict we make sure the load/store
2797 unit is free for at least one cycle during the execution of insns in
2798 the previous ilb. */
2799static int spu_ls_first;
2800static int prev_ls_clock;
2801
2802static void
2803spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2804 int max_ready ATTRIBUTE_UNUSED)
2805{
2806 spu_sched_length = 0;
2807}
2808
2809static void
2810spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2811 int max_ready ATTRIBUTE_UNUSED)
2812{
2813 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2814 {
2815 /* When any block might be at least 8-byte aligned, assume they
2816 will all be at least 8-byte aligned to make sure dual issue
2817 works out correctly. */
2818 spu_sched_length = 0;
2819 }
2820 spu_ls_first = INT_MAX;
2821 clock_var = -1;
2822 prev_ls_clock = -1;
2823 pipe0_clock = -1;
2824 pipe1_clock = -1;
2825 prev_clock_var = -1;
2826 prev_priority = -1;
2827}
2828
644459d0 2829static int
5a976006 2830spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2831 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2832{
5a976006 2833 int len;
2834 int p;
644459d0 2835 if (GET_CODE (PATTERN (insn)) == USE
2836 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2837 || (len = get_attr_length (insn)) == 0)
2838 return more;
2839
2840 spu_sched_length += len;
2841
2842 /* Reset on inline asm */
2843 if (INSN_CODE (insn) == -1)
2844 {
2845 spu_ls_first = INT_MAX;
2846 pipe0_clock = -1;
2847 pipe1_clock = -1;
2848 return 0;
2849 }
2850 p = get_pipe (insn);
2851 if (p == 0)
2852 pipe0_clock = clock_var;
2853 else
2854 pipe1_clock = clock_var;
2855
2856 if (in_spu_reorg)
2857 {
2858 if (clock_var - prev_ls_clock > 1
2859 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2860 spu_ls_first = INT_MAX;
2861 if (uses_ls_unit (insn))
2862 {
2863 if (spu_ls_first == INT_MAX)
2864 spu_ls_first = spu_sched_length;
2865 prev_ls_clock = clock_var;
2866 }
2867
2868 /* The scheduler hasn't inserted the nop, but we will later on.
2869 Include those nops in spu_sched_length. */
2870 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2871 spu_sched_length += 4;
2872 prev_clock_var = clock_var;
2873
2874 /* more is -1 when called from spu_sched_reorder for new insns
2875 that don't have INSN_PRIORITY */
2876 if (more >= 0)
2877 prev_priority = INSN_PRIORITY (insn);
2878 }
2879
2880 /* Always try issueing more insns. spu_sched_reorder will decide
2881 when the cycle should be advanced. */
2882 return 1;
2883}
2884
2885/* This function is called for both TARGET_SCHED_REORDER and
2886 TARGET_SCHED_REORDER2. */
2887static int
2888spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2889 rtx *ready, int *nreadyp, int clock)
2890{
2891 int i, nready = *nreadyp;
2892 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2893 rtx insn;
2894
2895 clock_var = clock;
2896
2897 if (nready <= 0 || pipe1_clock >= clock)
2898 return 0;
2899
2900 /* Find any rtl insns that don't generate assembly insns and schedule
2901 them first. */
2902 for (i = nready - 1; i >= 0; i--)
2903 {
2904 insn = ready[i];
2905 if (INSN_CODE (insn) == -1
2906 || INSN_CODE (insn) == CODE_FOR_blockage
2907 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2908 {
2909 ready[i] = ready[nready - 1];
2910 ready[nready - 1] = insn;
2911 return 1;
2912 }
2913 }
2914
2915 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2916 for (i = 0; i < nready; i++)
2917 if (INSN_CODE (ready[i]) != -1)
2918 {
2919 insn = ready[i];
2920 switch (get_attr_type (insn))
2921 {
2922 default:
2923 case TYPE_MULTI0:
2924 case TYPE_CONVERT:
2925 case TYPE_FX2:
2926 case TYPE_FX3:
2927 case TYPE_SPR:
2928 case TYPE_NOP:
2929 case TYPE_FXB:
2930 case TYPE_FPD:
2931 case TYPE_FP6:
2932 case TYPE_FP7:
2933 pipe_0 = i;
2934 break;
2935 case TYPE_LOAD:
2936 case TYPE_STORE:
2937 pipe_ls = i;
2938 case TYPE_LNOP:
2939 case TYPE_SHUF:
2940 case TYPE_BR:
2941 case TYPE_MULTI1:
2942 case TYPE_HBR:
2943 pipe_1 = i;
2944 break;
2945 case TYPE_IPREFETCH:
2946 pipe_hbrp = i;
2947 break;
2948 }
2949 }
2950
2951 /* In the first scheduling phase, schedule loads and stores together
2952 to increase the chance they will get merged during postreload CSE. */
2953 if (!reload_completed && pipe_ls >= 0)
2954 {
2955 insn = ready[pipe_ls];
2956 ready[pipe_ls] = ready[nready - 1];
2957 ready[nready - 1] = insn;
2958 return 1;
2959 }
2960
2961 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2962 if (pipe_hbrp >= 0)
2963 pipe_1 = pipe_hbrp;
2964
2965 /* When we have loads/stores in every cycle of the last 15 insns and
2966 we are about to schedule another load/store, emit an hbrp insn
2967 instead. */
2968 if (in_spu_reorg
2969 && spu_sched_length - spu_ls_first >= 4 * 15
2970 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2971 {
2972 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2973 recog_memoized (insn);
2974 if (pipe0_clock < clock)
2975 PUT_MODE (insn, TImode);
2976 spu_sched_variable_issue (file, verbose, insn, -1);
2977 return 0;
2978 }
2979
2980 /* In general, we want to emit nops to increase dual issue, but dual
2981 issue isn't faster when one of the insns could be scheduled later
2982 without effecting the critical path. We look at INSN_PRIORITY to
2983 make a good guess, but it isn't perfect so -mdual-nops=n can be
2984 used to effect it. */
2985 if (in_spu_reorg && spu_dual_nops < 10)
2986 {
2987 /* When we are at an even address and we are not issueing nops to
2988 improve scheduling then we need to advance the cycle. */
2989 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2990 && (spu_dual_nops == 0
2991 || (pipe_1 != -1
2992 && prev_priority >
2993 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2994 return 0;
2995
2996 /* When at an odd address, schedule the highest priority insn
2997 without considering pipeline. */
2998 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2999 && (spu_dual_nops == 0
3000 || (prev_priority >
3001 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3002 return 1;
3003 }
3004
3005
3006 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3007 pipe0 insn in the ready list, schedule it. */
3008 if (pipe0_clock < clock && pipe_0 >= 0)
3009 schedule_i = pipe_0;
3010
3011 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3012 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3013 else
3014 schedule_i = pipe_1;
3015
3016 if (schedule_i > -1)
3017 {
3018 insn = ready[schedule_i];
3019 ready[schedule_i] = ready[nready - 1];
3020 ready[nready - 1] = insn;
3021 return 1;
3022 }
3023 return 0;
644459d0 3024}
3025
3026/* INSN is dependent on DEP_INSN. */
3027static int
5a976006 3028spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3029{
5a976006 3030 rtx set;
3031
3032 /* The blockage pattern is used to prevent instructions from being
3033 moved across it and has no cost. */
3034 if (INSN_CODE (insn) == CODE_FOR_blockage
3035 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3036 return 0;
3037
3038 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3039 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3040 return 0;
3041
3042 /* Make sure hbrps are spread out. */
3043 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3044 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3045 return 8;
3046
3047 /* Make sure hints and hbrps are 2 cycles apart. */
3048 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3049 || INSN_CODE (insn) == CODE_FOR_hbr)
3050 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3051 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3052 return 2;
3053
3054 /* An hbrp has no real dependency on other insns. */
3055 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3056 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3057 return 0;
3058
3059 /* Assuming that it is unlikely an argument register will be used in
3060 the first cycle of the called function, we reduce the cost for
3061 slightly better scheduling of dep_insn. When not hinted, the
3062 mispredicted branch would hide the cost as well. */
3063 if (CALL_P (insn))
3064 {
3065 rtx target = get_branch_target (insn);
3066 if (GET_CODE (target) != REG || !set_of (target, insn))
3067 return cost - 2;
3068 return cost;
3069 }
3070
3071 /* And when returning from a function, let's assume the return values
3072 are completed sooner too. */
3073 if (CALL_P (dep_insn))
644459d0 3074 return cost - 2;
5a976006 3075
3076 /* Make sure an instruction that loads from the back chain is schedule
3077 away from the return instruction so a hint is more likely to get
3078 issued. */
3079 if (INSN_CODE (insn) == CODE_FOR__return
3080 && (set = single_set (dep_insn))
3081 && GET_CODE (SET_DEST (set)) == REG
3082 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3083 return 20;
3084
644459d0 3085 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3086 scheduler makes every insn in a block anti-dependent on the final
3087 jump_insn. We adjust here so higher cost insns will get scheduled
3088 earlier. */
5a976006 3089 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3090 return insn_cost (dep_insn) - 3;
5a976006 3091
644459d0 3092 return cost;
3093}
3094\f
3095/* Create a CONST_DOUBLE from a string. */
3096struct rtx_def *
3097spu_float_const (const char *string, enum machine_mode mode)
3098{
3099 REAL_VALUE_TYPE value;
3100 value = REAL_VALUE_ATOF (string, mode);
3101 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3102}
3103
644459d0 3104int
3105spu_constant_address_p (rtx x)
3106{
3107 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3108 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3109 || GET_CODE (x) == HIGH);
3110}
3111
3112static enum spu_immediate
3113which_immediate_load (HOST_WIDE_INT val)
3114{
3115 gcc_assert (val == trunc_int_for_mode (val, SImode));
3116
3117 if (val >= -0x8000 && val <= 0x7fff)
3118 return SPU_IL;
3119 if (val >= 0 && val <= 0x3ffff)
3120 return SPU_ILA;
3121 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3122 return SPU_ILH;
3123 if ((val & 0xffff) == 0)
3124 return SPU_ILHU;
3125
3126 return SPU_NONE;
3127}
3128
dea01258 3129/* Return true when OP can be loaded by one of the il instructions, or
3130 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3131int
3132immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3133{
3134 if (CONSTANT_P (op))
3135 {
3136 enum immediate_class c = classify_immediate (op, mode);
5df189be 3137 return c == IC_IL1 || c == IC_IL1s
3072d30e 3138 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3139 }
3140 return 0;
3141}
3142
3143/* Return true if the first SIZE bytes of arr is a constant that can be
3144 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3145 represent the size and offset of the instruction to use. */
3146static int
3147cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3148{
3149 int cpat, run, i, start;
3150 cpat = 1;
3151 run = 0;
3152 start = -1;
3153 for (i = 0; i < size && cpat; i++)
3154 if (arr[i] != i+16)
3155 {
3156 if (!run)
3157 {
3158 start = i;
3159 if (arr[i] == 3)
3160 run = 1;
3161 else if (arr[i] == 2 && arr[i+1] == 3)
3162 run = 2;
3163 else if (arr[i] == 0)
3164 {
3165 while (arr[i+run] == run && i+run < 16)
3166 run++;
3167 if (run != 4 && run != 8)
3168 cpat = 0;
3169 }
3170 else
3171 cpat = 0;
3172 if ((i & (run-1)) != 0)
3173 cpat = 0;
3174 i += run;
3175 }
3176 else
3177 cpat = 0;
3178 }
b01a6dc3 3179 if (cpat && (run || size < 16))
dea01258 3180 {
3181 if (run == 0)
3182 run = 1;
3183 if (prun)
3184 *prun = run;
3185 if (pstart)
3186 *pstart = start == -1 ? 16-run : start;
3187 return 1;
3188 }
3189 return 0;
3190}
3191
3192/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3193 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3194static enum immediate_class
3195classify_immediate (rtx op, enum machine_mode mode)
644459d0 3196{
3197 HOST_WIDE_INT val;
3198 unsigned char arr[16];
5df189be 3199 int i, j, repeated, fsmbi, repeat;
dea01258 3200
3201 gcc_assert (CONSTANT_P (op));
3202
644459d0 3203 if (GET_MODE (op) != VOIDmode)
3204 mode = GET_MODE (op);
3205
dea01258 3206 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3207 if (!flag_pic
3208 && mode == V4SImode
dea01258 3209 && GET_CODE (op) == CONST_VECTOR
3210 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3211 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3212 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3213 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3214 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3215 op = CONST_VECTOR_ELT (op, 0);
644459d0 3216
dea01258 3217 switch (GET_CODE (op))
3218 {
3219 case SYMBOL_REF:
3220 case LABEL_REF:
3221 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3222
dea01258 3223 case CONST:
0cfc65d4 3224 /* We can never know if the resulting address fits in 18 bits and can be
3225 loaded with ila. For now, assume the address will not overflow if
3226 the displacement is "small" (fits 'K' constraint). */
3227 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3228 {
3229 rtx sym = XEXP (XEXP (op, 0), 0);
3230 rtx cst = XEXP (XEXP (op, 0), 1);
3231
3232 if (GET_CODE (sym) == SYMBOL_REF
3233 && GET_CODE (cst) == CONST_INT
3234 && satisfies_constraint_K (cst))
3235 return IC_IL1s;
3236 }
3237 return IC_IL2s;
644459d0 3238
dea01258 3239 case HIGH:
3240 return IC_IL1s;
3241
3242 case CONST_VECTOR:
3243 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3244 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3245 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3246 return IC_POOL;
3247 /* Fall through. */
3248
3249 case CONST_INT:
3250 case CONST_DOUBLE:
3251 constant_to_array (mode, op, arr);
644459d0 3252
dea01258 3253 /* Check that each 4-byte slot is identical. */
3254 repeated = 1;
3255 for (i = 4; i < 16; i += 4)
3256 for (j = 0; j < 4; j++)
3257 if (arr[j] != arr[i + j])
3258 repeated = 0;
3259
3260 if (repeated)
3261 {
3262 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3263 val = trunc_int_for_mode (val, SImode);
3264
3265 if (which_immediate_load (val) != SPU_NONE)
3266 return IC_IL1;
3267 }
3268
3269 /* Any mode of 2 bytes or smaller can be loaded with an il
3270 instruction. */
3271 gcc_assert (GET_MODE_SIZE (mode) > 2);
3272
3273 fsmbi = 1;
5df189be 3274 repeat = 0;
dea01258 3275 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3276 if (arr[i] != 0 && repeat == 0)
3277 repeat = arr[i];
3278 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3279 fsmbi = 0;
3280 if (fsmbi)
5df189be 3281 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3282
3283 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3284 return IC_CPAT;
3285
3286 if (repeated)
3287 return IC_IL2;
3288
3289 return IC_POOL;
3290 default:
3291 break;
3292 }
3293 gcc_unreachable ();
644459d0 3294}
3295
3296static enum spu_immediate
3297which_logical_immediate (HOST_WIDE_INT val)
3298{
3299 gcc_assert (val == trunc_int_for_mode (val, SImode));
3300
3301 if (val >= -0x200 && val <= 0x1ff)
3302 return SPU_ORI;
3303 if (val >= 0 && val <= 0xffff)
3304 return SPU_IOHL;
3305 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3306 {
3307 val = trunc_int_for_mode (val, HImode);
3308 if (val >= -0x200 && val <= 0x1ff)
3309 return SPU_ORHI;
3310 if ((val & 0xff) == ((val >> 8) & 0xff))
3311 {
3312 val = trunc_int_for_mode (val, QImode);
3313 if (val >= -0x200 && val <= 0x1ff)
3314 return SPU_ORBI;
3315 }
3316 }
3317 return SPU_NONE;
3318}
3319
5df189be 3320/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3321 CONST_DOUBLEs. */
3322static int
3323const_vector_immediate_p (rtx x)
3324{
3325 int i;
3326 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3327 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3328 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3329 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3330 return 0;
3331 return 1;
3332}
3333
644459d0 3334int
3335logical_immediate_p (rtx op, enum machine_mode mode)
3336{
3337 HOST_WIDE_INT val;
3338 unsigned char arr[16];
3339 int i, j;
3340
3341 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3342 || GET_CODE (op) == CONST_VECTOR);
3343
5df189be 3344 if (GET_CODE (op) == CONST_VECTOR
3345 && !const_vector_immediate_p (op))
3346 return 0;
3347
644459d0 3348 if (GET_MODE (op) != VOIDmode)
3349 mode = GET_MODE (op);
3350
3351 constant_to_array (mode, op, arr);
3352
3353 /* Check that bytes are repeated. */
3354 for (i = 4; i < 16; i += 4)
3355 for (j = 0; j < 4; j++)
3356 if (arr[j] != arr[i + j])
3357 return 0;
3358
3359 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3360 val = trunc_int_for_mode (val, SImode);
3361
3362 i = which_logical_immediate (val);
3363 return i != SPU_NONE && i != SPU_IOHL;
3364}
3365
3366int
3367iohl_immediate_p (rtx op, enum machine_mode mode)
3368{
3369 HOST_WIDE_INT val;
3370 unsigned char arr[16];
3371 int i, j;
3372
3373 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3374 || GET_CODE (op) == CONST_VECTOR);
3375
5df189be 3376 if (GET_CODE (op) == CONST_VECTOR
3377 && !const_vector_immediate_p (op))
3378 return 0;
3379
644459d0 3380 if (GET_MODE (op) != VOIDmode)
3381 mode = GET_MODE (op);
3382
3383 constant_to_array (mode, op, arr);
3384
3385 /* Check that bytes are repeated. */
3386 for (i = 4; i < 16; i += 4)
3387 for (j = 0; j < 4; j++)
3388 if (arr[j] != arr[i + j])
3389 return 0;
3390
3391 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3392 val = trunc_int_for_mode (val, SImode);
3393
3394 return val >= 0 && val <= 0xffff;
3395}
3396
3397int
3398arith_immediate_p (rtx op, enum machine_mode mode,
3399 HOST_WIDE_INT low, HOST_WIDE_INT high)
3400{
3401 HOST_WIDE_INT val;
3402 unsigned char arr[16];
3403 int bytes, i, j;
3404
3405 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3406 || GET_CODE (op) == CONST_VECTOR);
3407
5df189be 3408 if (GET_CODE (op) == CONST_VECTOR
3409 && !const_vector_immediate_p (op))
3410 return 0;
3411
644459d0 3412 if (GET_MODE (op) != VOIDmode)
3413 mode = GET_MODE (op);
3414
3415 constant_to_array (mode, op, arr);
3416
3417 if (VECTOR_MODE_P (mode))
3418 mode = GET_MODE_INNER (mode);
3419
3420 bytes = GET_MODE_SIZE (mode);
3421 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3422
3423 /* Check that bytes are repeated. */
3424 for (i = bytes; i < 16; i += bytes)
3425 for (j = 0; j < bytes; j++)
3426 if (arr[j] != arr[i + j])
3427 return 0;
3428
3429 val = arr[0];
3430 for (j = 1; j < bytes; j++)
3431 val = (val << 8) | arr[j];
3432
3433 val = trunc_int_for_mode (val, mode);
3434
3435 return val >= low && val <= high;
3436}
3437
3438/* We accept:
5b865faf 3439 - any 32-bit constant (SImode, SFmode)
644459d0 3440 - any constant that can be generated with fsmbi (any mode)
5b865faf 3441 - a 64-bit constant where the high and low bits are identical
644459d0 3442 (DImode, DFmode)
5b865faf 3443 - a 128-bit constant where the four 32-bit words match. */
644459d0 3444int
3445spu_legitimate_constant_p (rtx x)
3446{
5df189be 3447 if (GET_CODE (x) == HIGH)
3448 x = XEXP (x, 0);
644459d0 3449 /* V4SI with all identical symbols is valid. */
5df189be 3450 if (!flag_pic
3451 && GET_MODE (x) == V4SImode
644459d0 3452 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3453 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3454 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3455 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3456 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3457 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3458
5df189be 3459 if (GET_CODE (x) == CONST_VECTOR
3460 && !const_vector_immediate_p (x))
3461 return 0;
644459d0 3462 return 1;
3463}
3464
3465/* Valid address are:
3466 - symbol_ref, label_ref, const
3467 - reg
3468 - reg + const, where either reg or const is 16 byte aligned
3469 - reg + reg, alignment doesn't matter
3470 The alignment matters in the reg+const case because lqd and stqd
3471 ignore the 4 least significant bits of the const. (TODO: It might be
3472 preferable to allow any alignment and fix it up when splitting.) */
3473int
3474spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3475 rtx x, int reg_ok_strict)
3476{
3477 if (mode == TImode && GET_CODE (x) == AND
3478 && GET_CODE (XEXP (x, 1)) == CONST_INT
3479 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3480 x = XEXP (x, 0);
3481 switch (GET_CODE (x))
3482 {
3483 case SYMBOL_REF:
3484 case LABEL_REF:
3485 return !TARGET_LARGE_MEM;
3486
3487 case CONST:
0cfc65d4 3488 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3489 {
3490 rtx sym = XEXP (XEXP (x, 0), 0);
3491 rtx cst = XEXP (XEXP (x, 0), 1);
3492
3493 /* Accept any symbol_ref + constant, assuming it does not
3494 wrap around the local store addressability limit. */
3495 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3496 return 1;
3497 }
3498 return 0;
644459d0 3499
3500 case CONST_INT:
3501 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3502
3503 case SUBREG:
3504 x = XEXP (x, 0);
3505 gcc_assert (GET_CODE (x) == REG);
3506
3507 case REG:
3508 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3509
3510 case PLUS:
3511 case LO_SUM:
3512 {
3513 rtx op0 = XEXP (x, 0);
3514 rtx op1 = XEXP (x, 1);
3515 if (GET_CODE (op0) == SUBREG)
3516 op0 = XEXP (op0, 0);
3517 if (GET_CODE (op1) == SUBREG)
3518 op1 = XEXP (op1, 0);
3519 /* We can't just accept any aligned register because CSE can
3520 change it to a register that is not marked aligned and then
3521 recog will fail. So we only accept frame registers because
3522 they will only be changed to other frame registers. */
3523 if (GET_CODE (op0) == REG
3524 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3525 && GET_CODE (op1) == CONST_INT
3526 && INTVAL (op1) >= -0x2000
3527 && INTVAL (op1) <= 0x1fff
5df189be 3528 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 3529 return 1;
3530 if (GET_CODE (op0) == REG
3531 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3532 && GET_CODE (op1) == REG
3533 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3534 return 1;
3535 }
3536 break;
3537
3538 default:
3539 break;
3540 }
3541 return 0;
3542}
3543
3544/* When the address is reg + const_int, force the const_int into a
fa7637bd 3545 register. */
644459d0 3546rtx
3547spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3548 enum machine_mode mode)
3549{
3550 rtx op0, op1;
3551 /* Make sure both operands are registers. */
3552 if (GET_CODE (x) == PLUS)
3553 {
3554 op0 = XEXP (x, 0);
3555 op1 = XEXP (x, 1);
3556 if (ALIGNED_SYMBOL_REF_P (op0))
3557 {
3558 op0 = force_reg (Pmode, op0);
3559 mark_reg_pointer (op0, 128);
3560 }
3561 else if (GET_CODE (op0) != REG)
3562 op0 = force_reg (Pmode, op0);
3563 if (ALIGNED_SYMBOL_REF_P (op1))
3564 {
3565 op1 = force_reg (Pmode, op1);
3566 mark_reg_pointer (op1, 128);
3567 }
3568 else if (GET_CODE (op1) != REG)
3569 op1 = force_reg (Pmode, op1);
3570 x = gen_rtx_PLUS (Pmode, op0, op1);
3571 if (spu_legitimate_address (mode, x, 0))
3572 return x;
3573 }
3574 return NULL_RTX;
3575}
3576
3577/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3578 struct attribute_spec.handler. */
3579static tree
3580spu_handle_fndecl_attribute (tree * node,
3581 tree name,
3582 tree args ATTRIBUTE_UNUSED,
3583 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3584{
3585 if (TREE_CODE (*node) != FUNCTION_DECL)
3586 {
3587 warning (0, "`%s' attribute only applies to functions",
3588 IDENTIFIER_POINTER (name));
3589 *no_add_attrs = true;
3590 }
3591
3592 return NULL_TREE;
3593}
3594
3595/* Handle the "vector" attribute. */
3596static tree
3597spu_handle_vector_attribute (tree * node, tree name,
3598 tree args ATTRIBUTE_UNUSED,
3599 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3600{
3601 tree type = *node, result = NULL_TREE;
3602 enum machine_mode mode;
3603 int unsigned_p;
3604
3605 while (POINTER_TYPE_P (type)
3606 || TREE_CODE (type) == FUNCTION_TYPE
3607 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3608 type = TREE_TYPE (type);
3609
3610 mode = TYPE_MODE (type);
3611
3612 unsigned_p = TYPE_UNSIGNED (type);
3613 switch (mode)
3614 {
3615 case DImode:
3616 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3617 break;
3618 case SImode:
3619 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3620 break;
3621 case HImode:
3622 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3623 break;
3624 case QImode:
3625 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3626 break;
3627 case SFmode:
3628 result = V4SF_type_node;
3629 break;
3630 case DFmode:
3631 result = V2DF_type_node;
3632 break;
3633 default:
3634 break;
3635 }
3636
3637 /* Propagate qualifiers attached to the element type
3638 onto the vector type. */
3639 if (result && result != type && TYPE_QUALS (type))
3640 result = build_qualified_type (result, TYPE_QUALS (type));
3641
3642 *no_add_attrs = true; /* No need to hang on to the attribute. */
3643
3644 if (!result)
3645 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3646 else
d991e6e8 3647 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3648
3649 return NULL_TREE;
3650}
3651
f2b32076 3652/* Return nonzero if FUNC is a naked function. */
644459d0 3653static int
3654spu_naked_function_p (tree func)
3655{
3656 tree a;
3657
3658 if (TREE_CODE (func) != FUNCTION_DECL)
3659 abort ();
3660
3661 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3662 return a != NULL_TREE;
3663}
3664
3665int
3666spu_initial_elimination_offset (int from, int to)
3667{
3668 int saved_regs_size = spu_saved_regs_size ();
3669 int sp_offset = 0;
abe32cce 3670 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3671 || get_frame_size () || saved_regs_size)
3672 sp_offset = STACK_POINTER_OFFSET;
3673 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3674 return (sp_offset + crtl->outgoing_args_size);
644459d0 3675 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3676 return 0;
3677 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3678 return sp_offset + crtl->outgoing_args_size
644459d0 3679 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3680 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3681 return get_frame_size () + saved_regs_size + sp_offset;
3682 return 0;
3683}
3684
3685rtx
fb80456a 3686spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3687{
3688 enum machine_mode mode = TYPE_MODE (type);
3689 int byte_size = ((mode == BLKmode)
3690 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3691
3692 /* Make sure small structs are left justified in a register. */
3693 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3694 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3695 {
3696 enum machine_mode smode;
3697 rtvec v;
3698 int i;
3699 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3700 int n = byte_size / UNITS_PER_WORD;
3701 v = rtvec_alloc (nregs);
3702 for (i = 0; i < n; i++)
3703 {
3704 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3705 gen_rtx_REG (TImode,
3706 FIRST_RETURN_REGNUM
3707 + i),
3708 GEN_INT (UNITS_PER_WORD * i));
3709 byte_size -= UNITS_PER_WORD;
3710 }
3711
3712 if (n < nregs)
3713 {
3714 if (byte_size < 4)
3715 byte_size = 4;
3716 smode =
3717 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3718 RTVEC_ELT (v, n) =
3719 gen_rtx_EXPR_LIST (VOIDmode,
3720 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3721 GEN_INT (UNITS_PER_WORD * n));
3722 }
3723 return gen_rtx_PARALLEL (mode, v);
3724 }
3725 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3726}
3727
3728rtx
3729spu_function_arg (CUMULATIVE_ARGS cum,
3730 enum machine_mode mode,
3731 tree type, int named ATTRIBUTE_UNUSED)
3732{
3733 int byte_size;
3734
3735 if (cum >= MAX_REGISTER_ARGS)
3736 return 0;
3737
3738 byte_size = ((mode == BLKmode)
3739 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3740
3741 /* The ABI does not allow parameters to be passed partially in
3742 reg and partially in stack. */
3743 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3744 return 0;
3745
3746 /* Make sure small structs are left justified in a register. */
3747 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3748 && byte_size < UNITS_PER_WORD && byte_size > 0)
3749 {
3750 enum machine_mode smode;
3751 rtx gr_reg;
3752 if (byte_size < 4)
3753 byte_size = 4;
3754 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3755 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3756 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3757 const0_rtx);
3758 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3759 }
3760 else
3761 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3762}
3763
3764/* Variable sized types are passed by reference. */
3765static bool
3766spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3767 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3768 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3769{
3770 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3771}
3772\f
3773
3774/* Var args. */
3775
3776/* Create and return the va_list datatype.
3777
3778 On SPU, va_list is an array type equivalent to
3779
3780 typedef struct __va_list_tag
3781 {
3782 void *__args __attribute__((__aligned(16)));
3783 void *__skip __attribute__((__aligned(16)));
3784
3785 } va_list[1];
3786
fa7637bd 3787 where __args points to the arg that will be returned by the next
644459d0 3788 va_arg(), and __skip points to the previous stack frame such that
3789 when __args == __skip we should advance __args by 32 bytes. */
3790static tree
3791spu_build_builtin_va_list (void)
3792{
3793 tree f_args, f_skip, record, type_decl;
3794 bool owp;
3795
3796 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3797
3798 type_decl =
3799 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3800
3801 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3802 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3803
3804 DECL_FIELD_CONTEXT (f_args) = record;
3805 DECL_ALIGN (f_args) = 128;
3806 DECL_USER_ALIGN (f_args) = 1;
3807
3808 DECL_FIELD_CONTEXT (f_skip) = record;
3809 DECL_ALIGN (f_skip) = 128;
3810 DECL_USER_ALIGN (f_skip) = 1;
3811
3812 TREE_CHAIN (record) = type_decl;
3813 TYPE_NAME (record) = type_decl;
3814 TYPE_FIELDS (record) = f_args;
3815 TREE_CHAIN (f_args) = f_skip;
3816
3817 /* We know this is being padded and we want it too. It is an internal
3818 type so hide the warnings from the user. */
3819 owp = warn_padded;
3820 warn_padded = false;
3821
3822 layout_type (record);
3823
3824 warn_padded = owp;
3825
3826 /* The correct type is an array type of one element. */
3827 return build_array_type (record, build_index_type (size_zero_node));
3828}
3829
3830/* Implement va_start by filling the va_list structure VALIST.
3831 NEXTARG points to the first anonymous stack argument.
3832
3833 The following global variables are used to initialize
3834 the va_list structure:
3835
abe32cce 3836 crtl->args.info;
644459d0 3837 the CUMULATIVE_ARGS for this function
3838
abe32cce 3839 crtl->args.arg_offset_rtx:
644459d0 3840 holds the offset of the first anonymous stack argument
3841 (relative to the virtual arg pointer). */
3842
8a58ed0a 3843static void
644459d0 3844spu_va_start (tree valist, rtx nextarg)
3845{
3846 tree f_args, f_skip;
3847 tree args, skip, t;
3848
3849 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3850 f_skip = TREE_CHAIN (f_args);
3851
3852 valist = build_va_arg_indirect_ref (valist);
3853 args =
3854 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3855 skip =
3856 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3857
3858 /* Find the __args area. */
3859 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3860 if (crtl->args.pretend_args_size > 0)
0de36bdb 3861 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3862 size_int (-STACK_POINTER_OFFSET));
75a70cf9 3863 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 3864 TREE_SIDE_EFFECTS (t) = 1;
3865 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3866
3867 /* Find the __skip area. */
3868 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 3869 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 3870 size_int (crtl->args.pretend_args_size
0de36bdb 3871 - STACK_POINTER_OFFSET));
75a70cf9 3872 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 3873 TREE_SIDE_EFFECTS (t) = 1;
3874 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3875}
3876
3877/* Gimplify va_arg by updating the va_list structure
3878 VALIST as required to retrieve an argument of type
3879 TYPE, and returning that argument.
3880
3881 ret = va_arg(VALIST, TYPE);
3882
3883 generates code equivalent to:
3884
3885 paddedsize = (sizeof(TYPE) + 15) & -16;
3886 if (VALIST.__args + paddedsize > VALIST.__skip
3887 && VALIST.__args <= VALIST.__skip)
3888 addr = VALIST.__skip + 32;
3889 else
3890 addr = VALIST.__args;
3891 VALIST.__args = addr + paddedsize;
3892 ret = *(TYPE *)addr;
3893 */
3894static tree
75a70cf9 3895spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3896 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 3897{
3898 tree f_args, f_skip;
3899 tree args, skip;
3900 HOST_WIDE_INT size, rsize;
3901 tree paddedsize, addr, tmp;
3902 bool pass_by_reference_p;
3903
3904 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3905 f_skip = TREE_CHAIN (f_args);
3906
3907 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3908 args =
3909 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3910 skip =
3911 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3912
3913 addr = create_tmp_var (ptr_type_node, "va_arg");
3914 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3915
3916 /* if an object is dynamically sized, a pointer to it is passed
3917 instead of the object itself. */
3918 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3919 false);
3920 if (pass_by_reference_p)
3921 type = build_pointer_type (type);
3922 size = int_size_in_bytes (type);
3923 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3924
3925 /* build conditional expression to calculate addr. The expression
3926 will be gimplified later. */
0de36bdb 3927 paddedsize = size_int (rsize);
75a70cf9 3928 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 3929 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 3930 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3931 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3932 unshare_expr (skip)));
644459d0 3933
3934 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 3935 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3936 size_int (32)), unshare_expr (args));
644459d0 3937
75a70cf9 3938 gimplify_assign (addr, tmp, pre_p);
644459d0 3939
3940 /* update VALIST.__args */
0de36bdb 3941 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 3942 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 3943
3944 addr = fold_convert (build_pointer_type (type), addr);
3945
3946 if (pass_by_reference_p)
3947 addr = build_va_arg_indirect_ref (addr);
3948
3949 return build_va_arg_indirect_ref (addr);
3950}
3951
3952/* Save parameter registers starting with the register that corresponds
3953 to the first unnamed parameters. If the first unnamed parameter is
3954 in the stack then save no registers. Set pretend_args_size to the
3955 amount of space needed to save the registers. */
3956void
3957spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3958 tree type, int *pretend_size, int no_rtl)
3959{
3960 if (!no_rtl)
3961 {
3962 rtx tmp;
3963 int regno;
3964 int offset;
3965 int ncum = *cum;
3966
3967 /* cum currently points to the last named argument, we want to
3968 start at the next argument. */
3969 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3970
3971 offset = -STACK_POINTER_OFFSET;
3972 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3973 {
3974 tmp = gen_frame_mem (V4SImode,
3975 plus_constant (virtual_incoming_args_rtx,
3976 offset));
3977 emit_move_insn (tmp,
3978 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3979 offset += 16;
3980 }
3981 *pretend_size = offset + STACK_POINTER_OFFSET;
3982 }
3983}
3984\f
3985void
3986spu_conditional_register_usage (void)
3987{
3988 if (flag_pic)
3989 {
3990 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3991 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3992 }
644459d0 3993}
3994
3995/* This is called to decide when we can simplify a load instruction. We
3996 must only return true for registers which we know will always be
3997 aligned. Taking into account that CSE might replace this reg with
3998 another one that has not been marked aligned.
3999 So this is really only true for frame, stack and virtual registers,
fa7637bd 4000 which we know are always aligned and should not be adversely effected
4001 by CSE. */
644459d0 4002static int
4003regno_aligned_for_load (int regno)
4004{
4005 return regno == FRAME_POINTER_REGNUM
5df189be 4006 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
aa71ecd4 4007 || regno == ARG_POINTER_REGNUM
644459d0 4008 || regno == STACK_POINTER_REGNUM
5df189be 4009 || (regno >= FIRST_VIRTUAL_REGISTER
4010 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 4011}
4012
4013/* Return TRUE when mem is known to be 16-byte aligned. */
4014int
4015aligned_mem_p (rtx mem)
4016{
4017 if (MEM_ALIGN (mem) >= 128)
4018 return 1;
4019 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4020 return 1;
4021 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4022 {
4023 rtx p0 = XEXP (XEXP (mem, 0), 0);
4024 rtx p1 = XEXP (XEXP (mem, 0), 1);
4025 if (regno_aligned_for_load (REGNO (p0)))
4026 {
4027 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4028 return 1;
4029 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4030 return 1;
4031 }
4032 }
4033 else if (GET_CODE (XEXP (mem, 0)) == REG)
4034 {
4035 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4036 return 1;
4037 }
4038 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4039 return 1;
4040 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4041 {
4042 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4043 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4044 if (GET_CODE (p0) == SYMBOL_REF
4045 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4046 return 1;
4047 }
4048 return 0;
4049}
4050
69ced2d6 4051/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4052 into its SYMBOL_REF_FLAGS. */
4053static void
4054spu_encode_section_info (tree decl, rtx rtl, int first)
4055{
4056 default_encode_section_info (decl, rtl, first);
4057
4058 /* If a variable has a forced alignment to < 16 bytes, mark it with
4059 SYMBOL_FLAG_ALIGN1. */
4060 if (TREE_CODE (decl) == VAR_DECL
4061 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4062 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4063}
4064
644459d0 4065/* Return TRUE if we are certain the mem refers to a complete object
4066 which is both 16-byte aligned and padded to a 16-byte boundary. This
4067 would make it safe to store with a single instruction.
4068 We guarantee the alignment and padding for static objects by aligning
4069 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4070 FIXME: We currently cannot guarantee this for objects on the stack
4071 because assign_parm_setup_stack calls assign_stack_local with the
4072 alignment of the parameter mode and in that case the alignment never
4073 gets adjusted by LOCAL_ALIGNMENT. */
4074static int
4075store_with_one_insn_p (rtx mem)
4076{
4077 rtx addr = XEXP (mem, 0);
4078 if (GET_MODE (mem) == BLKmode)
4079 return 0;
4080 /* Only static objects. */
4081 if (GET_CODE (addr) == SYMBOL_REF)
4082 {
4083 /* We use the associated declaration to make sure the access is
fa7637bd 4084 referring to the whole object.
644459d0 4085 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4086 if it is necessary. Will there be cases where one exists, and
4087 the other does not? Will there be cases where both exist, but
4088 have different types? */
4089 tree decl = MEM_EXPR (mem);
4090 if (decl
4091 && TREE_CODE (decl) == VAR_DECL
4092 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4093 return 1;
4094 decl = SYMBOL_REF_DECL (addr);
4095 if (decl
4096 && TREE_CODE (decl) == VAR_DECL
4097 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4098 return 1;
4099 }
4100 return 0;
4101}
4102
4103int
4104spu_expand_mov (rtx * ops, enum machine_mode mode)
4105{
4106 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4107 abort ();
4108
4109 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4110 {
4111 rtx from = SUBREG_REG (ops[1]);
4112 enum machine_mode imode = GET_MODE (from);
4113
4114 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4115 && GET_MODE_CLASS (imode) == MODE_INT
4116 && subreg_lowpart_p (ops[1]));
4117
4118 if (GET_MODE_SIZE (imode) < 4)
4119 {
4120 from = gen_rtx_SUBREG (SImode, from, 0);
4121 imode = SImode;
4122 }
4123
4124 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4125 {
99bdde56 4126 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4127 emit_insn (GEN_FCN (icode) (ops[0], from));
4128 }
4129 else
4130 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4131 return 1;
4132 }
4133
4134 /* At least one of the operands needs to be a register. */
4135 if ((reload_in_progress | reload_completed) == 0
4136 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4137 {
4138 rtx temp = force_reg (mode, ops[1]);
4139 emit_move_insn (ops[0], temp);
4140 return 1;
4141 }
4142 if (reload_in_progress || reload_completed)
4143 {
dea01258 4144 if (CONSTANT_P (ops[1]))
4145 return spu_split_immediate (ops);
644459d0 4146 return 0;
4147 }
4148 else
4149 {
4150 if (GET_CODE (ops[0]) == MEM)
4151 {
4152 if (!spu_valid_move (ops))
4153 {
4154 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4155 gen_reg_rtx (TImode)));
4156 return 1;
4157 }
4158 }
4159 else if (GET_CODE (ops[1]) == MEM)
4160 {
4161 if (!spu_valid_move (ops))
4162 {
4163 emit_insn (gen_load
4164 (ops[0], ops[1], gen_reg_rtx (TImode),
4165 gen_reg_rtx (SImode)));
4166 return 1;
4167 }
4168 }
4169 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4170 extend them. */
4171 if (GET_CODE (ops[1]) == CONST_INT)
4172 {
4173 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4174 if (val != INTVAL (ops[1]))
4175 {
4176 emit_move_insn (ops[0], GEN_INT (val));
4177 return 1;
4178 }
4179 }
4180 }
4181 return 0;
4182}
4183
644459d0 4184void
4185spu_split_load (rtx * ops)
4186{
4187 enum machine_mode mode = GET_MODE (ops[0]);
4188 rtx addr, load, rot, mem, p0, p1;
4189 int rot_amt;
4190
4191 addr = XEXP (ops[1], 0);
4192
4193 rot = 0;
4194 rot_amt = 0;
4195 if (GET_CODE (addr) == PLUS)
4196 {
4197 /* 8 cases:
4198 aligned reg + aligned reg => lqx
4199 aligned reg + unaligned reg => lqx, rotqby
4200 aligned reg + aligned const => lqd
4201 aligned reg + unaligned const => lqd, rotqbyi
4202 unaligned reg + aligned reg => lqx, rotqby
4203 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4204 unaligned reg + aligned const => lqd, rotqby
4205 unaligned reg + unaligned const -> not allowed by legitimate address
4206 */
4207 p0 = XEXP (addr, 0);
4208 p1 = XEXP (addr, 1);
aa71ecd4 4209 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
644459d0 4210 {
aa71ecd4 4211 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4212 {
4213 emit_insn (gen_addsi3 (ops[3], p0, p1));
4214 rot = ops[3];
4215 }
4216 else
4217 rot = p0;
4218 }
4219 else
4220 {
4221 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4222 {
4223 rot_amt = INTVAL (p1) & 15;
4224 p1 = GEN_INT (INTVAL (p1) & -16);
4225 addr = gen_rtx_PLUS (SImode, p0, p1);
4226 }
aa71ecd4 4227 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4228 rot = p1;
4229 }
4230 }
4231 else if (GET_CODE (addr) == REG)
4232 {
aa71ecd4 4233 if (!regno_aligned_for_load (REGNO (addr)))
644459d0 4234 rot = addr;
4235 }
4236 else if (GET_CODE (addr) == CONST)
4237 {
4238 if (GET_CODE (XEXP (addr, 0)) == PLUS
4239 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4240 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4241 {
4242 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4243 if (rot_amt & -16)
4244 addr = gen_rtx_CONST (Pmode,
4245 gen_rtx_PLUS (Pmode,
4246 XEXP (XEXP (addr, 0), 0),
4247 GEN_INT (rot_amt & -16)));
4248 else
4249 addr = XEXP (XEXP (addr, 0), 0);
4250 }
4251 else
4252 rot = addr;
4253 }
4254 else if (GET_CODE (addr) == CONST_INT)
4255 {
4256 rot_amt = INTVAL (addr);
4257 addr = GEN_INT (rot_amt & -16);
4258 }
4259 else if (!ALIGNED_SYMBOL_REF_P (addr))
4260 rot = addr;
4261
4262 if (GET_MODE_SIZE (mode) < 4)
4263 rot_amt += GET_MODE_SIZE (mode) - 4;
4264
4265 rot_amt &= 15;
4266
4267 if (rot && rot_amt)
4268 {
4269 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4270 rot = ops[3];
4271 rot_amt = 0;
4272 }
4273
4274 load = ops[2];
4275
4276 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4277 mem = change_address (ops[1], TImode, addr);
4278
e04cf423 4279 emit_insn (gen_movti (load, mem));
644459d0 4280
4281 if (rot)
4282 emit_insn (gen_rotqby_ti (load, load, rot));
4283 else if (rot_amt)
4284 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4285
4286 if (reload_completed)
4287 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4288 else
4289 emit_insn (gen_spu_convert (ops[0], load));
4290}
4291
4292void
4293spu_split_store (rtx * ops)
4294{
4295 enum machine_mode mode = GET_MODE (ops[0]);
4296 rtx pat = ops[2];
4297 rtx reg = ops[3];
4298 rtx addr, p0, p1, p1_lo, smem;
4299 int aform;
4300 int scalar;
4301
4302 addr = XEXP (ops[0], 0);
4303
4304 if (GET_CODE (addr) == PLUS)
4305 {
4306 /* 8 cases:
4307 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4308 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4309 aligned reg + aligned const => lqd, c?d, shuf, stqx
4310 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4311 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4312 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4313 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4314 unaligned reg + unaligned const -> not allowed by legitimate address
4315 */
4316 aform = 0;
4317 p0 = XEXP (addr, 0);
4318 p1 = p1_lo = XEXP (addr, 1);
4319 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4320 {
4321 p1_lo = GEN_INT (INTVAL (p1) & 15);
4322 p1 = GEN_INT (INTVAL (p1) & -16);
4323 addr = gen_rtx_PLUS (SImode, p0, p1);
4324 }
4325 }
4326 else if (GET_CODE (addr) == REG)
4327 {
4328 aform = 0;
4329 p0 = addr;
4330 p1 = p1_lo = const0_rtx;
4331 }
4332 else
4333 {
4334 aform = 1;
4335 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4336 p1 = 0; /* aform doesn't use p1 */
4337 p1_lo = addr;
4338 if (ALIGNED_SYMBOL_REF_P (addr))
4339 p1_lo = const0_rtx;
4340 else if (GET_CODE (addr) == CONST)
4341 {
4342 if (GET_CODE (XEXP (addr, 0)) == PLUS
4343 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4344 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4345 {
4346 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4347 if ((v & -16) != 0)
4348 addr = gen_rtx_CONST (Pmode,
4349 gen_rtx_PLUS (Pmode,
4350 XEXP (XEXP (addr, 0), 0),
4351 GEN_INT (v & -16)));
4352 else
4353 addr = XEXP (XEXP (addr, 0), 0);
4354 p1_lo = GEN_INT (v & 15);
4355 }
4356 }
4357 else if (GET_CODE (addr) == CONST_INT)
4358 {
4359 p1_lo = GEN_INT (INTVAL (addr) & 15);
4360 addr = GEN_INT (INTVAL (addr) & -16);
4361 }
4362 }
4363
e04cf423 4364 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4365
644459d0 4366 scalar = store_with_one_insn_p (ops[0]);
4367 if (!scalar)
4368 {
4369 /* We could copy the flags from the ops[0] MEM to mem here,
4370 We don't because we want this load to be optimized away if
4371 possible, and copying the flags will prevent that in certain
4372 cases, e.g. consider the volatile flag. */
4373
e04cf423 4374 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4375 set_mem_alias_set (lmem, 0);
4376 emit_insn (gen_movti (reg, lmem));
644459d0 4377
aa71ecd4 4378 if (!p0 || regno_aligned_for_load (REGNO (p0)))
644459d0 4379 p0 = stack_pointer_rtx;
4380 if (!p1_lo)
4381 p1_lo = const0_rtx;
4382
4383 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4384 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4385 }
4386 else if (reload_completed)
4387 {
4388 if (GET_CODE (ops[1]) == REG)
4389 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4390 else if (GET_CODE (ops[1]) == SUBREG)
4391 emit_move_insn (reg,
4392 gen_rtx_REG (GET_MODE (reg),
4393 REGNO (SUBREG_REG (ops[1]))));
4394 else
4395 abort ();
4396 }
4397 else
4398 {
4399 if (GET_CODE (ops[1]) == REG)
4400 emit_insn (gen_spu_convert (reg, ops[1]));
4401 else if (GET_CODE (ops[1]) == SUBREG)
4402 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4403 else
4404 abort ();
4405 }
4406
4407 if (GET_MODE_SIZE (mode) < 4 && scalar)
4408 emit_insn (gen_shlqby_ti
4409 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4410
644459d0 4411 smem = change_address (ops[0], TImode, addr);
4412 /* We can't use the previous alias set because the memory has changed
4413 size and can potentially overlap objects of other types. */
4414 set_mem_alias_set (smem, 0);
4415
e04cf423 4416 emit_insn (gen_movti (smem, reg));
644459d0 4417}
4418
4419/* Return TRUE if X is MEM which is a struct member reference
4420 and the member can safely be loaded and stored with a single
4421 instruction because it is padded. */
4422static int
4423mem_is_padded_component_ref (rtx x)
4424{
4425 tree t = MEM_EXPR (x);
4426 tree r;
4427 if (!t || TREE_CODE (t) != COMPONENT_REF)
4428 return 0;
4429 t = TREE_OPERAND (t, 1);
4430 if (!t || TREE_CODE (t) != FIELD_DECL
4431 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4432 return 0;
4433 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4434 r = DECL_FIELD_CONTEXT (t);
4435 if (!r || TREE_CODE (r) != RECORD_TYPE)
4436 return 0;
4437 /* Make sure they are the same mode */
4438 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4439 return 0;
4440 /* If there are no following fields then the field alignment assures
fa7637bd 4441 the structure is padded to the alignment which means this field is
4442 padded too. */
644459d0 4443 if (TREE_CHAIN (t) == 0)
4444 return 1;
4445 /* If the following field is also aligned then this field will be
4446 padded. */
4447 t = TREE_CHAIN (t);
4448 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4449 return 1;
4450 return 0;
4451}
4452
c7b91b14 4453/* Parse the -mfixed-range= option string. */
4454static void
4455fix_range (const char *const_str)
4456{
4457 int i, first, last;
4458 char *str, *dash, *comma;
4459
4460 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4461 REG2 are either register names or register numbers. The effect
4462 of this option is to mark the registers in the range from REG1 to
4463 REG2 as ``fixed'' so they won't be used by the compiler. */
4464
4465 i = strlen (const_str);
4466 str = (char *) alloca (i + 1);
4467 memcpy (str, const_str, i + 1);
4468
4469 while (1)
4470 {
4471 dash = strchr (str, '-');
4472 if (!dash)
4473 {
4474 warning (0, "value of -mfixed-range must have form REG1-REG2");
4475 return;
4476 }
4477 *dash = '\0';
4478 comma = strchr (dash + 1, ',');
4479 if (comma)
4480 *comma = '\0';
4481
4482 first = decode_reg_name (str);
4483 if (first < 0)
4484 {
4485 warning (0, "unknown register name: %s", str);
4486 return;
4487 }
4488
4489 last = decode_reg_name (dash + 1);
4490 if (last < 0)
4491 {
4492 warning (0, "unknown register name: %s", dash + 1);
4493 return;
4494 }
4495
4496 *dash = '-';
4497
4498 if (first > last)
4499 {
4500 warning (0, "%s-%s is an empty range", str, dash + 1);
4501 return;
4502 }
4503
4504 for (i = first; i <= last; ++i)
4505 fixed_regs[i] = call_used_regs[i] = 1;
4506
4507 if (!comma)
4508 break;
4509
4510 *comma = ',';
4511 str = comma + 1;
4512 }
4513}
4514
644459d0 4515int
4516spu_valid_move (rtx * ops)
4517{
4518 enum machine_mode mode = GET_MODE (ops[0]);
4519 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4520 return 0;
4521
4522 /* init_expr_once tries to recog against load and store insns to set
4523 the direct_load[] and direct_store[] arrays. We always want to
4524 consider those loads and stores valid. init_expr_once is called in
4525 the context of a dummy function which does not have a decl. */
4526 if (cfun->decl == 0)
4527 return 1;
4528
4529 /* Don't allows loads/stores which would require more than 1 insn.
4530 During and after reload we assume loads and stores only take 1
4531 insn. */
4532 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4533 {
4534 if (GET_CODE (ops[0]) == MEM
4535 && (GET_MODE_SIZE (mode) < 4
4536 || !(store_with_one_insn_p (ops[0])
4537 || mem_is_padded_component_ref (ops[0]))))
4538 return 0;
4539 if (GET_CODE (ops[1]) == MEM
4540 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4541 return 0;
4542 }
4543 return 1;
4544}
4545
4546/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4547 can be generated using the fsmbi instruction. */
4548int
4549fsmbi_const_p (rtx x)
4550{
dea01258 4551 if (CONSTANT_P (x))
4552 {
5df189be 4553 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4554 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4555 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4556 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4557 }
4558 return 0;
4559}
4560
4561/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4562 can be generated using the cbd, chd, cwd or cdd instruction. */
4563int
4564cpat_const_p (rtx x, enum machine_mode mode)
4565{
4566 if (CONSTANT_P (x))
4567 {
4568 enum immediate_class c = classify_immediate (x, mode);
4569 return c == IC_CPAT;
4570 }
4571 return 0;
4572}
644459d0 4573
dea01258 4574rtx
4575gen_cpat_const (rtx * ops)
4576{
4577 unsigned char dst[16];
4578 int i, offset, shift, isize;
4579 if (GET_CODE (ops[3]) != CONST_INT
4580 || GET_CODE (ops[2]) != CONST_INT
4581 || (GET_CODE (ops[1]) != CONST_INT
4582 && GET_CODE (ops[1]) != REG))
4583 return 0;
4584 if (GET_CODE (ops[1]) == REG
4585 && (!REG_POINTER (ops[1])
4586 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4587 return 0;
644459d0 4588
4589 for (i = 0; i < 16; i++)
dea01258 4590 dst[i] = i + 16;
4591 isize = INTVAL (ops[3]);
4592 if (isize == 1)
4593 shift = 3;
4594 else if (isize == 2)
4595 shift = 2;
4596 else
4597 shift = 0;
4598 offset = (INTVAL (ops[2]) +
4599 (GET_CODE (ops[1]) ==
4600 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4601 for (i = 0; i < isize; i++)
4602 dst[offset + i] = i + shift;
4603 return array_to_constant (TImode, dst);
644459d0 4604}
4605
4606/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4607 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4608 than 16 bytes, the value is repeated across the rest of the array. */
4609void
4610constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4611{
4612 HOST_WIDE_INT val;
4613 int i, j, first;
4614
4615 memset (arr, 0, 16);
4616 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4617 if (GET_CODE (x) == CONST_INT
4618 || (GET_CODE (x) == CONST_DOUBLE
4619 && (mode == SFmode || mode == DFmode)))
4620 {
4621 gcc_assert (mode != VOIDmode && mode != BLKmode);
4622
4623 if (GET_CODE (x) == CONST_DOUBLE)
4624 val = const_double_to_hwint (x);
4625 else
4626 val = INTVAL (x);
4627 first = GET_MODE_SIZE (mode) - 1;
4628 for (i = first; i >= 0; i--)
4629 {
4630 arr[i] = val & 0xff;
4631 val >>= 8;
4632 }
4633 /* Splat the constant across the whole array. */
4634 for (j = 0, i = first + 1; i < 16; i++)
4635 {
4636 arr[i] = arr[j];
4637 j = (j == first) ? 0 : j + 1;
4638 }
4639 }
4640 else if (GET_CODE (x) == CONST_DOUBLE)
4641 {
4642 val = CONST_DOUBLE_LOW (x);
4643 for (i = 15; i >= 8; i--)
4644 {
4645 arr[i] = val & 0xff;
4646 val >>= 8;
4647 }
4648 val = CONST_DOUBLE_HIGH (x);
4649 for (i = 7; i >= 0; i--)
4650 {
4651 arr[i] = val & 0xff;
4652 val >>= 8;
4653 }
4654 }
4655 else if (GET_CODE (x) == CONST_VECTOR)
4656 {
4657 int units;
4658 rtx elt;
4659 mode = GET_MODE_INNER (mode);
4660 units = CONST_VECTOR_NUNITS (x);
4661 for (i = 0; i < units; i++)
4662 {
4663 elt = CONST_VECTOR_ELT (x, i);
4664 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4665 {
4666 if (GET_CODE (elt) == CONST_DOUBLE)
4667 val = const_double_to_hwint (elt);
4668 else
4669 val = INTVAL (elt);
4670 first = GET_MODE_SIZE (mode) - 1;
4671 if (first + i * GET_MODE_SIZE (mode) > 16)
4672 abort ();
4673 for (j = first; j >= 0; j--)
4674 {
4675 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4676 val >>= 8;
4677 }
4678 }
4679 }
4680 }
4681 else
4682 gcc_unreachable();
4683}
4684
4685/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4686 smaller than 16 bytes, use the bytes that would represent that value
4687 in a register, e.g., for QImode return the value of arr[3]. */
4688rtx
4689array_to_constant (enum machine_mode mode, unsigned char arr[16])
4690{
4691 enum machine_mode inner_mode;
4692 rtvec v;
4693 int units, size, i, j, k;
4694 HOST_WIDE_INT val;
4695
4696 if (GET_MODE_CLASS (mode) == MODE_INT
4697 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4698 {
4699 j = GET_MODE_SIZE (mode);
4700 i = j < 4 ? 4 - j : 0;
4701 for (val = 0; i < j; i++)
4702 val = (val << 8) | arr[i];
4703 val = trunc_int_for_mode (val, mode);
4704 return GEN_INT (val);
4705 }
4706
4707 if (mode == TImode)
4708 {
4709 HOST_WIDE_INT high;
4710 for (i = high = 0; i < 8; i++)
4711 high = (high << 8) | arr[i];
4712 for (i = 8, val = 0; i < 16; i++)
4713 val = (val << 8) | arr[i];
4714 return immed_double_const (val, high, TImode);
4715 }
4716 if (mode == SFmode)
4717 {
4718 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4719 val = trunc_int_for_mode (val, SImode);
171b6d22 4720 return hwint_to_const_double (SFmode, val);
644459d0 4721 }
4722 if (mode == DFmode)
4723 {
4724 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4725 val <<= 32;
4726 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
171b6d22 4727 return hwint_to_const_double (DFmode, val);
644459d0 4728 }
4729
4730 if (!VECTOR_MODE_P (mode))
4731 abort ();
4732
4733 units = GET_MODE_NUNITS (mode);
4734 size = GET_MODE_UNIT_SIZE (mode);
4735 inner_mode = GET_MODE_INNER (mode);
4736 v = rtvec_alloc (units);
4737
4738 for (k = i = 0; i < units; ++i)
4739 {
4740 val = 0;
4741 for (j = 0; j < size; j++, k++)
4742 val = (val << 8) | arr[k];
4743
4744 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4745 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4746 else
4747 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4748 }
4749 if (k > 16)
4750 abort ();
4751
4752 return gen_rtx_CONST_VECTOR (mode, v);
4753}
4754
4755static void
4756reloc_diagnostic (rtx x)
4757{
4758 tree loc_decl, decl = 0;
4759 const char *msg;
4760 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4761 return;
4762
4763 if (GET_CODE (x) == SYMBOL_REF)
4764 decl = SYMBOL_REF_DECL (x);
4765 else if (GET_CODE (x) == CONST
4766 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4767 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4768
4769 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4770 if (decl && !DECL_P (decl))
4771 decl = 0;
4772
4773 /* We use last_assemble_variable_decl to get line information. It's
4774 not always going to be right and might not even be close, but will
4775 be right for the more common cases. */
5df189be 4776 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4777 loc_decl = decl;
4778 else
4779 loc_decl = last_assemble_variable_decl;
4780
4781 /* The decl could be a string constant. */
4782 if (decl && DECL_P (decl))
4783 msg = "%Jcreating run-time relocation for %qD";
4784 else
4785 msg = "creating run-time relocation";
4786
99369027 4787 if (TARGET_WARN_RELOC)
644459d0 4788 warning (0, msg, loc_decl, decl);
99369027 4789 else
4790 error (msg, loc_decl, decl);
644459d0 4791}
4792
4793/* Hook into assemble_integer so we can generate an error for run-time
4794 relocations. The SPU ABI disallows them. */
4795static bool
4796spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4797{
4798 /* By default run-time relocations aren't supported, but we allow them
4799 in case users support it in their own run-time loader. And we provide
4800 a warning for those users that don't. */
4801 if ((GET_CODE (x) == SYMBOL_REF)
4802 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4803 reloc_diagnostic (x);
4804
4805 return default_assemble_integer (x, size, aligned_p);
4806}
4807
4808static void
4809spu_asm_globalize_label (FILE * file, const char *name)
4810{
4811 fputs ("\t.global\t", file);
4812 assemble_name (file, name);
4813 fputs ("\n", file);
4814}
4815
4816static bool
f529eb25 4817spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4818 bool speed ATTRIBUTE_UNUSED)
644459d0 4819{
4820 enum machine_mode mode = GET_MODE (x);
4821 int cost = COSTS_N_INSNS (2);
4822
4823 /* Folding to a CONST_VECTOR will use extra space but there might
4824 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4825 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4826 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4827 because this cost will only be compared against a single insn.
4828 if (code == CONST_VECTOR)
4829 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4830 */
4831
4832 /* Use defaults for float operations. Not accurate but good enough. */
4833 if (mode == DFmode)
4834 {
4835 *total = COSTS_N_INSNS (13);
4836 return true;
4837 }
4838 if (mode == SFmode)
4839 {
4840 *total = COSTS_N_INSNS (6);
4841 return true;
4842 }
4843 switch (code)
4844 {
4845 case CONST_INT:
4846 if (satisfies_constraint_K (x))
4847 *total = 0;
4848 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4849 *total = COSTS_N_INSNS (1);
4850 else
4851 *total = COSTS_N_INSNS (3);
4852 return true;
4853
4854 case CONST:
4855 *total = COSTS_N_INSNS (3);
4856 return true;
4857
4858 case LABEL_REF:
4859 case SYMBOL_REF:
4860 *total = COSTS_N_INSNS (0);
4861 return true;
4862
4863 case CONST_DOUBLE:
4864 *total = COSTS_N_INSNS (5);
4865 return true;
4866
4867 case FLOAT_EXTEND:
4868 case FLOAT_TRUNCATE:
4869 case FLOAT:
4870 case UNSIGNED_FLOAT:
4871 case FIX:
4872 case UNSIGNED_FIX:
4873 *total = COSTS_N_INSNS (7);
4874 return true;
4875
4876 case PLUS:
4877 if (mode == TImode)
4878 {
4879 *total = COSTS_N_INSNS (9);
4880 return true;
4881 }
4882 break;
4883
4884 case MULT:
4885 cost =
4886 GET_CODE (XEXP (x, 0)) ==
4887 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4888 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4889 {
4890 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4891 {
4892 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4893 cost = COSTS_N_INSNS (14);
4894 if ((val & 0xffff) == 0)
4895 cost = COSTS_N_INSNS (9);
4896 else if (val > 0 && val < 0x10000)
4897 cost = COSTS_N_INSNS (11);
4898 }
4899 }
4900 *total = cost;
4901 return true;
4902 case DIV:
4903 case UDIV:
4904 case MOD:
4905 case UMOD:
4906 *total = COSTS_N_INSNS (20);
4907 return true;
4908 case ROTATE:
4909 case ROTATERT:
4910 case ASHIFT:
4911 case ASHIFTRT:
4912 case LSHIFTRT:
4913 *total = COSTS_N_INSNS (4);
4914 return true;
4915 case UNSPEC:
4916 if (XINT (x, 1) == UNSPEC_CONVERT)
4917 *total = COSTS_N_INSNS (0);
4918 else
4919 *total = COSTS_N_INSNS (4);
4920 return true;
4921 }
4922 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4923 if (GET_MODE_CLASS (mode) == MODE_INT
4924 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4925 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4926 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4927 *total = cost;
4928 return true;
4929}
4930
1bd43494 4931static enum machine_mode
4932spu_unwind_word_mode (void)
644459d0 4933{
1bd43494 4934 return SImode;
644459d0 4935}
4936
4937/* Decide whether we can make a sibling call to a function. DECL is the
4938 declaration of the function being targeted by the call and EXP is the
4939 CALL_EXPR representing the call. */
4940static bool
4941spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4942{
4943 return decl && !TARGET_LARGE_MEM;
4944}
4945
4946/* We need to correctly update the back chain pointer and the Available
4947 Stack Size (which is in the second slot of the sp register.) */
4948void
4949spu_allocate_stack (rtx op0, rtx op1)
4950{
4951 HOST_WIDE_INT v;
4952 rtx chain = gen_reg_rtx (V4SImode);
4953 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4954 rtx sp = gen_reg_rtx (V4SImode);
4955 rtx splatted = gen_reg_rtx (V4SImode);
4956 rtx pat = gen_reg_rtx (TImode);
4957
4958 /* copy the back chain so we can save it back again. */
4959 emit_move_insn (chain, stack_bot);
4960
4961 op1 = force_reg (SImode, op1);
4962
4963 v = 0x1020300010203ll;
4964 emit_move_insn (pat, immed_double_const (v, v, TImode));
4965 emit_insn (gen_shufb (splatted, op1, op1, pat));
4966
4967 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4968 emit_insn (gen_subv4si3 (sp, sp, splatted));
4969
4970 if (flag_stack_check)
4971 {
4972 rtx avail = gen_reg_rtx(SImode);
4973 rtx result = gen_reg_rtx(SImode);
4974 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4975 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4976 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4977 }
4978
4979 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4980
4981 emit_move_insn (stack_bot, chain);
4982
4983 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4984}
4985
4986void
4987spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4988{
4989 static unsigned char arr[16] =
4990 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4991 rtx temp = gen_reg_rtx (SImode);
4992 rtx temp2 = gen_reg_rtx (SImode);
4993 rtx temp3 = gen_reg_rtx (V4SImode);
4994 rtx temp4 = gen_reg_rtx (V4SImode);
4995 rtx pat = gen_reg_rtx (TImode);
4996 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4997
4998 /* Restore the backchain from the first word, sp from the second. */
4999 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5000 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5001
5002 emit_move_insn (pat, array_to_constant (TImode, arr));
5003
5004 /* Compute Available Stack Size for sp */
5005 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5006 emit_insn (gen_shufb (temp3, temp, temp, pat));
5007
5008 /* Compute Available Stack Size for back chain */
5009 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5010 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5011 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5012
5013 emit_insn (gen_addv4si3 (sp, sp, temp3));
5014 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5015}
5016
5017static void
5018spu_init_libfuncs (void)
5019{
5020 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5021 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5022 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5023 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5024 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5025 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5026 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5027 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5028 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5029 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5030 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5031
5032 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5033 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5034
5035 set_optab_libfunc (smul_optab, TImode, "__multi3");
5036 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5037 set_optab_libfunc (smod_optab, TImode, "__modti3");
5038 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5039 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5040 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5041}
5042
5043/* Make a subreg, stripping any existing subreg. We could possibly just
5044 call simplify_subreg, but in this case we know what we want. */
5045rtx
5046spu_gen_subreg (enum machine_mode mode, rtx x)
5047{
5048 if (GET_CODE (x) == SUBREG)
5049 x = SUBREG_REG (x);
5050 if (GET_MODE (x) == mode)
5051 return x;
5052 return gen_rtx_SUBREG (mode, x, 0);
5053}
5054
5055static bool
fb80456a 5056spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5057{
5058 return (TYPE_MODE (type) == BLKmode
5059 && ((type) == 0
5060 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5061 || int_size_in_bytes (type) >
5062 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5063}
5064\f
5065/* Create the built-in types and functions */
5066
5067struct spu_builtin_description spu_builtins[] = {
5068#define DEF_BUILTIN(fcode, icode, name, type, params) \
5069 {fcode, icode, name, type, params, NULL_TREE},
5070#include "spu-builtins.def"
5071#undef DEF_BUILTIN
5072};
5073
5074static void
5075spu_init_builtins (void)
5076{
5077 struct spu_builtin_description *d;
5078 unsigned int i;
5079
5080 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5081 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5082 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5083 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5084 V4SF_type_node = build_vector_type (float_type_node, 4);
5085 V2DF_type_node = build_vector_type (double_type_node, 2);
5086
5087 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5088 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5089 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5090 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5091
c4ecce0c 5092 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5093
5094 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5095 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5096 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5097 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5098 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5099 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5100 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5101 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5102 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5103 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5104 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5105 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5106
5107 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5108 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5109 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5110 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5111 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5112 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5113 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5114 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5115
5116 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5117 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5118
5119 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5120
5121 spu_builtin_types[SPU_BTI_PTR] =
5122 build_pointer_type (build_qualified_type
5123 (void_type_node,
5124 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5125
5126 /* For each builtin we build a new prototype. The tree code will make
5127 sure nodes are shared. */
5128 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5129 {
5130 tree p;
5131 char name[64]; /* build_function will make a copy. */
5132 int parm;
5133
5134 if (d->name == 0)
5135 continue;
5136
5dfbd18f 5137 /* Find last parm. */
644459d0 5138 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5139 ;
644459d0 5140
5141 p = void_list_node;
5142 while (parm > 1)
5143 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5144
5145 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5146
5147 sprintf (name, "__builtin_%s", d->name);
5148 d->fndecl =
5149 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5150 NULL, NULL_TREE);
a76866d3 5151 if (d->fcode == SPU_MASK_FOR_LOAD)
5152 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5153
5154 /* These builtins don't throw. */
5155 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5156 }
5157}
5158
cf31d486 5159void
5160spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5161{
5162 static unsigned char arr[16] =
5163 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5164
5165 rtx temp = gen_reg_rtx (Pmode);
5166 rtx temp2 = gen_reg_rtx (V4SImode);
5167 rtx temp3 = gen_reg_rtx (V4SImode);
5168 rtx pat = gen_reg_rtx (TImode);
5169 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5170
5171 emit_move_insn (pat, array_to_constant (TImode, arr));
5172
5173 /* Restore the sp. */
5174 emit_move_insn (temp, op1);
5175 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5176
5177 /* Compute available stack size for sp. */
5178 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5179 emit_insn (gen_shufb (temp3, temp, temp, pat));
5180
5181 emit_insn (gen_addv4si3 (sp, sp, temp3));
5182 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5183}
5184
644459d0 5185int
5186spu_safe_dma (HOST_WIDE_INT channel)
5187{
006e4b96 5188 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5189}
5190
5191void
5192spu_builtin_splats (rtx ops[])
5193{
5194 enum machine_mode mode = GET_MODE (ops[0]);
5195 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5196 {
5197 unsigned char arr[16];
5198 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5199 emit_move_insn (ops[0], array_to_constant (mode, arr));
5200 }
644459d0 5201 else
5202 {
5203 rtx reg = gen_reg_rtx (TImode);
5204 rtx shuf;
5205 if (GET_CODE (ops[1]) != REG
5206 && GET_CODE (ops[1]) != SUBREG)
5207 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5208 switch (mode)
5209 {
5210 case V2DImode:
5211 case V2DFmode:
5212 shuf =
5213 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5214 TImode);
5215 break;
5216 case V4SImode:
5217 case V4SFmode:
5218 shuf =
5219 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5220 TImode);
5221 break;
5222 case V8HImode:
5223 shuf =
5224 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5225 TImode);
5226 break;
5227 case V16QImode:
5228 shuf =
5229 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5230 TImode);
5231 break;
5232 default:
5233 abort ();
5234 }
5235 emit_move_insn (reg, shuf);
5236 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5237 }
5238}
5239
5240void
5241spu_builtin_extract (rtx ops[])
5242{
5243 enum machine_mode mode;
5244 rtx rot, from, tmp;
5245
5246 mode = GET_MODE (ops[1]);
5247
5248 if (GET_CODE (ops[2]) == CONST_INT)
5249 {
5250 switch (mode)
5251 {
5252 case V16QImode:
5253 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5254 break;
5255 case V8HImode:
5256 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5257 break;
5258 case V4SFmode:
5259 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5260 break;
5261 case V4SImode:
5262 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5263 break;
5264 case V2DImode:
5265 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5266 break;
5267 case V2DFmode:
5268 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5269 break;
5270 default:
5271 abort ();
5272 }
5273 return;
5274 }
5275
5276 from = spu_gen_subreg (TImode, ops[1]);
5277 rot = gen_reg_rtx (TImode);
5278 tmp = gen_reg_rtx (SImode);
5279
5280 switch (mode)
5281 {
5282 case V16QImode:
5283 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5284 break;
5285 case V8HImode:
5286 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5287 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5288 break;
5289 case V4SFmode:
5290 case V4SImode:
5291 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5292 break;
5293 case V2DImode:
5294 case V2DFmode:
5295 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5296 break;
5297 default:
5298 abort ();
5299 }
5300 emit_insn (gen_rotqby_ti (rot, from, tmp));
5301
5302 emit_insn (gen_spu_convert (ops[0], rot));
5303}
5304
5305void
5306spu_builtin_insert (rtx ops[])
5307{
5308 enum machine_mode mode = GET_MODE (ops[0]);
5309 enum machine_mode imode = GET_MODE_INNER (mode);
5310 rtx mask = gen_reg_rtx (TImode);
5311 rtx offset;
5312
5313 if (GET_CODE (ops[3]) == CONST_INT)
5314 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5315 else
5316 {
5317 offset = gen_reg_rtx (SImode);
5318 emit_insn (gen_mulsi3
5319 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5320 }
5321 emit_insn (gen_cpat
5322 (mask, stack_pointer_rtx, offset,
5323 GEN_INT (GET_MODE_SIZE (imode))));
5324 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5325}
5326
5327void
5328spu_builtin_promote (rtx ops[])
5329{
5330 enum machine_mode mode, imode;
5331 rtx rot, from, offset;
5332 HOST_WIDE_INT pos;
5333
5334 mode = GET_MODE (ops[0]);
5335 imode = GET_MODE_INNER (mode);
5336
5337 from = gen_reg_rtx (TImode);
5338 rot = spu_gen_subreg (TImode, ops[0]);
5339
5340 emit_insn (gen_spu_convert (from, ops[1]));
5341
5342 if (GET_CODE (ops[2]) == CONST_INT)
5343 {
5344 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5345 if (GET_MODE_SIZE (imode) < 4)
5346 pos += 4 - GET_MODE_SIZE (imode);
5347 offset = GEN_INT (pos & 15);
5348 }
5349 else
5350 {
5351 offset = gen_reg_rtx (SImode);
5352 switch (mode)
5353 {
5354 case V16QImode:
5355 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5356 break;
5357 case V8HImode:
5358 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5359 emit_insn (gen_addsi3 (offset, offset, offset));
5360 break;
5361 case V4SFmode:
5362 case V4SImode:
5363 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5364 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5365 break;
5366 case V2DImode:
5367 case V2DFmode:
5368 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5369 break;
5370 default:
5371 abort ();
5372 }
5373 }
5374 emit_insn (gen_rotqby_ti (rot, from, offset));
5375}
5376
5377void
5378spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5379{
5380 rtx shuf = gen_reg_rtx (V4SImode);
5381 rtx insn = gen_reg_rtx (V4SImode);
5382 rtx shufc;
5383 rtx insnc;
5384 rtx mem;
5385
5386 fnaddr = force_reg (SImode, fnaddr);
5387 cxt = force_reg (SImode, cxt);
5388
5389 if (TARGET_LARGE_MEM)
5390 {
5391 rtx rotl = gen_reg_rtx (V4SImode);
5392 rtx mask = gen_reg_rtx (V4SImode);
5393 rtx bi = gen_reg_rtx (SImode);
5394 unsigned char shufa[16] = {
5395 2, 3, 0, 1, 18, 19, 16, 17,
5396 0, 1, 2, 3, 16, 17, 18, 19
5397 };
5398 unsigned char insna[16] = {
5399 0x41, 0, 0, 79,
5400 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5401 0x60, 0x80, 0, 79,
5402 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5403 };
5404
5405 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5406 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5407
5408 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5409 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5410 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5411 emit_insn (gen_selb (insn, insnc, rotl, mask));
5412
5413 mem = memory_address (Pmode, tramp);
5414 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5415
5416 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5417 mem = memory_address (Pmode, plus_constant (tramp, 16));
5418 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5419 }
5420 else
5421 {
5422 rtx scxt = gen_reg_rtx (SImode);
5423 rtx sfnaddr = gen_reg_rtx (SImode);
5424 unsigned char insna[16] = {
5425 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5426 0x30, 0, 0, 0,
5427 0, 0, 0, 0,
5428 0, 0, 0, 0
5429 };
5430
5431 shufc = gen_reg_rtx (TImode);
5432 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5433
5434 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5435 fits 18 bits and the last 4 are zeros. This will be true if
5436 the stack pointer is initialized to 0x3fff0 at program start,
5437 otherwise the ila instruction will be garbage. */
5438
5439 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5440 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5441 emit_insn (gen_cpat
5442 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5443 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5444 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5445
5446 mem = memory_address (Pmode, tramp);
5447 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5448
5449 }
5450 emit_insn (gen_sync ());
5451}
5452
5453void
5454spu_expand_sign_extend (rtx ops[])
5455{
5456 unsigned char arr[16];
5457 rtx pat = gen_reg_rtx (TImode);
5458 rtx sign, c;
5459 int i, last;
5460 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5461 if (GET_MODE (ops[1]) == QImode)
5462 {
5463 sign = gen_reg_rtx (HImode);
5464 emit_insn (gen_extendqihi2 (sign, ops[1]));
5465 for (i = 0; i < 16; i++)
5466 arr[i] = 0x12;
5467 arr[last] = 0x13;
5468 }
5469 else
5470 {
5471 for (i = 0; i < 16; i++)
5472 arr[i] = 0x10;
5473 switch (GET_MODE (ops[1]))
5474 {
5475 case HImode:
5476 sign = gen_reg_rtx (SImode);
5477 emit_insn (gen_extendhisi2 (sign, ops[1]));
5478 arr[last] = 0x03;
5479 arr[last - 1] = 0x02;
5480 break;
5481 case SImode:
5482 sign = gen_reg_rtx (SImode);
5483 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5484 for (i = 0; i < 4; i++)
5485 arr[last - i] = 3 - i;
5486 break;
5487 case DImode:
5488 sign = gen_reg_rtx (SImode);
5489 c = gen_reg_rtx (SImode);
5490 emit_insn (gen_spu_convert (c, ops[1]));
5491 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5492 for (i = 0; i < 8; i++)
5493 arr[last - i] = 7 - i;
5494 break;
5495 default:
5496 abort ();
5497 }
5498 }
5499 emit_move_insn (pat, array_to_constant (TImode, arr));
5500 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5501}
5502
5503/* expand vector initialization. If there are any constant parts,
5504 load constant parts first. Then load any non-constant parts. */
5505void
5506spu_expand_vector_init (rtx target, rtx vals)
5507{
5508 enum machine_mode mode = GET_MODE (target);
5509 int n_elts = GET_MODE_NUNITS (mode);
5510 int n_var = 0;
5511 bool all_same = true;
790c536c 5512 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5513 int i;
5514
5515 first = XVECEXP (vals, 0, 0);
5516 for (i = 0; i < n_elts; ++i)
5517 {
5518 x = XVECEXP (vals, 0, i);
e442af0b 5519 if (!(CONST_INT_P (x)
5520 || GET_CODE (x) == CONST_DOUBLE
5521 || GET_CODE (x) == CONST_FIXED))
644459d0 5522 ++n_var;
5523 else
5524 {
5525 if (first_constant == NULL_RTX)
5526 first_constant = x;
5527 }
5528 if (i > 0 && !rtx_equal_p (x, first))
5529 all_same = false;
5530 }
5531
5532 /* if all elements are the same, use splats to repeat elements */
5533 if (all_same)
5534 {
5535 if (!CONSTANT_P (first)
5536 && !register_operand (first, GET_MODE (x)))
5537 first = force_reg (GET_MODE (first), first);
5538 emit_insn (gen_spu_splats (target, first));
5539 return;
5540 }
5541
5542 /* load constant parts */
5543 if (n_var != n_elts)
5544 {
5545 if (n_var == 0)
5546 {
5547 emit_move_insn (target,
5548 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5549 }
5550 else
5551 {
5552 rtx constant_parts_rtx = copy_rtx (vals);
5553
5554 gcc_assert (first_constant != NULL_RTX);
5555 /* fill empty slots with the first constant, this increases
5556 our chance of using splats in the recursive call below. */
5557 for (i = 0; i < n_elts; ++i)
e442af0b 5558 {
5559 x = XVECEXP (constant_parts_rtx, 0, i);
5560 if (!(CONST_INT_P (x)
5561 || GET_CODE (x) == CONST_DOUBLE
5562 || GET_CODE (x) == CONST_FIXED))
5563 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5564 }
644459d0 5565
5566 spu_expand_vector_init (target, constant_parts_rtx);
5567 }
5568 }
5569
5570 /* load variable parts */
5571 if (n_var != 0)
5572 {
5573 rtx insert_operands[4];
5574
5575 insert_operands[0] = target;
5576 insert_operands[2] = target;
5577 for (i = 0; i < n_elts; ++i)
5578 {
5579 x = XVECEXP (vals, 0, i);
e442af0b 5580 if (!(CONST_INT_P (x)
5581 || GET_CODE (x) == CONST_DOUBLE
5582 || GET_CODE (x) == CONST_FIXED))
644459d0 5583 {
5584 if (!register_operand (x, GET_MODE (x)))
5585 x = force_reg (GET_MODE (x), x);
5586 insert_operands[1] = x;
5587 insert_operands[3] = GEN_INT (i);
5588 spu_builtin_insert (insert_operands);
5589 }
5590 }
5591 }
5592}
6352eedf 5593
5474166e 5594/* Return insn index for the vector compare instruction for given CODE,
5595 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5596
5597static int
5598get_vec_cmp_insn (enum rtx_code code,
5599 enum machine_mode dest_mode,
5600 enum machine_mode op_mode)
5601
5602{
5603 switch (code)
5604 {
5605 case EQ:
5606 if (dest_mode == V16QImode && op_mode == V16QImode)
5607 return CODE_FOR_ceq_v16qi;
5608 if (dest_mode == V8HImode && op_mode == V8HImode)
5609 return CODE_FOR_ceq_v8hi;
5610 if (dest_mode == V4SImode && op_mode == V4SImode)
5611 return CODE_FOR_ceq_v4si;
5612 if (dest_mode == V4SImode && op_mode == V4SFmode)
5613 return CODE_FOR_ceq_v4sf;
5614 if (dest_mode == V2DImode && op_mode == V2DFmode)
5615 return CODE_FOR_ceq_v2df;
5616 break;
5617 case GT:
5618 if (dest_mode == V16QImode && op_mode == V16QImode)
5619 return CODE_FOR_cgt_v16qi;
5620 if (dest_mode == V8HImode && op_mode == V8HImode)
5621 return CODE_FOR_cgt_v8hi;
5622 if (dest_mode == V4SImode && op_mode == V4SImode)
5623 return CODE_FOR_cgt_v4si;
5624 if (dest_mode == V4SImode && op_mode == V4SFmode)
5625 return CODE_FOR_cgt_v4sf;
5626 if (dest_mode == V2DImode && op_mode == V2DFmode)
5627 return CODE_FOR_cgt_v2df;
5628 break;
5629 case GTU:
5630 if (dest_mode == V16QImode && op_mode == V16QImode)
5631 return CODE_FOR_clgt_v16qi;
5632 if (dest_mode == V8HImode && op_mode == V8HImode)
5633 return CODE_FOR_clgt_v8hi;
5634 if (dest_mode == V4SImode && op_mode == V4SImode)
5635 return CODE_FOR_clgt_v4si;
5636 break;
5637 default:
5638 break;
5639 }
5640 return -1;
5641}
5642
5643/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5644 DMODE is expected destination mode. This is a recursive function. */
5645
5646static rtx
5647spu_emit_vector_compare (enum rtx_code rcode,
5648 rtx op0, rtx op1,
5649 enum machine_mode dmode)
5650{
5651 int vec_cmp_insn;
5652 rtx mask;
5653 enum machine_mode dest_mode;
5654 enum machine_mode op_mode = GET_MODE (op1);
5655
5656 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5657
5658 /* Floating point vector compare instructions uses destination V4SImode.
5659 Double floating point vector compare instructions uses destination V2DImode.
5660 Move destination to appropriate mode later. */
5661 if (dmode == V4SFmode)
5662 dest_mode = V4SImode;
5663 else if (dmode == V2DFmode)
5664 dest_mode = V2DImode;
5665 else
5666 dest_mode = dmode;
5667
5668 mask = gen_reg_rtx (dest_mode);
5669 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5670
5671 if (vec_cmp_insn == -1)
5672 {
5673 bool swap_operands = false;
5674 bool try_again = false;
5675 switch (rcode)
5676 {
5677 case LT:
5678 rcode = GT;
5679 swap_operands = true;
5680 try_again = true;
5681 break;
5682 case LTU:
5683 rcode = GTU;
5684 swap_operands = true;
5685 try_again = true;
5686 break;
5687 case NE:
5688 /* Treat A != B as ~(A==B). */
5689 {
5690 enum insn_code nor_code;
5691 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5692 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5693 gcc_assert (nor_code != CODE_FOR_nothing);
5694 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5695 if (dmode != dest_mode)
5696 {
5697 rtx temp = gen_reg_rtx (dest_mode);
5698 convert_move (temp, mask, 0);
5699 return temp;
5700 }
5701 return mask;
5702 }
5703 break;
5704 case GE:
5705 case GEU:
5706 case LE:
5707 case LEU:
5708 /* Try GT/GTU/LT/LTU OR EQ */
5709 {
5710 rtx c_rtx, eq_rtx;
5711 enum insn_code ior_code;
5712 enum rtx_code new_code;
5713
5714 switch (rcode)
5715 {
5716 case GE: new_code = GT; break;
5717 case GEU: new_code = GTU; break;
5718 case LE: new_code = LT; break;
5719 case LEU: new_code = LTU; break;
5720 default:
5721 gcc_unreachable ();
5722 }
5723
5724 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5725 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5726
99bdde56 5727 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5728 gcc_assert (ior_code != CODE_FOR_nothing);
5729 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5730 if (dmode != dest_mode)
5731 {
5732 rtx temp = gen_reg_rtx (dest_mode);
5733 convert_move (temp, mask, 0);
5734 return temp;
5735 }
5736 return mask;
5737 }
5738 break;
5739 default:
5740 gcc_unreachable ();
5741 }
5742
5743 /* You only get two chances. */
5744 if (try_again)
5745 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5746
5747 gcc_assert (vec_cmp_insn != -1);
5748
5749 if (swap_operands)
5750 {
5751 rtx tmp;
5752 tmp = op0;
5753 op0 = op1;
5754 op1 = tmp;
5755 }
5756 }
5757
5758 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5759 if (dmode != dest_mode)
5760 {
5761 rtx temp = gen_reg_rtx (dest_mode);
5762 convert_move (temp, mask, 0);
5763 return temp;
5764 }
5765 return mask;
5766}
5767
5768
5769/* Emit vector conditional expression.
5770 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5771 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5772
5773int
5774spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5775 rtx cond, rtx cc_op0, rtx cc_op1)
5776{
5777 enum machine_mode dest_mode = GET_MODE (dest);
5778 enum rtx_code rcode = GET_CODE (cond);
5779 rtx mask;
5780
5781 /* Get the vector mask for the given relational operations. */
5782 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5783
5784 emit_insn(gen_selb (dest, op2, op1, mask));
5785
5786 return 1;
5787}
5788
6352eedf 5789static rtx
5790spu_force_reg (enum machine_mode mode, rtx op)
5791{
5792 rtx x, r;
5793 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5794 {
5795 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5796 || GET_MODE (op) == BLKmode)
5797 return force_reg (mode, convert_to_mode (mode, op, 0));
5798 abort ();
5799 }
5800
5801 r = force_reg (GET_MODE (op), op);
5802 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5803 {
5804 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5805 if (x)
5806 return x;
5807 }
5808
5809 x = gen_reg_rtx (mode);
5810 emit_insn (gen_spu_convert (x, r));
5811 return x;
5812}
5813
5814static void
5815spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5816{
5817 HOST_WIDE_INT v = 0;
5818 int lsbits;
5819 /* Check the range of immediate operands. */
5820 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5821 {
5822 int range = p - SPU_BTI_7;
5df189be 5823
5824 if (!CONSTANT_P (op))
6352eedf 5825 error ("%s expects an integer literal in the range [%d, %d].",
5826 d->name,
5827 spu_builtin_range[range].low, spu_builtin_range[range].high);
5828
5829 if (GET_CODE (op) == CONST
5830 && (GET_CODE (XEXP (op, 0)) == PLUS
5831 || GET_CODE (XEXP (op, 0)) == MINUS))
5832 {
5833 v = INTVAL (XEXP (XEXP (op, 0), 1));
5834 op = XEXP (XEXP (op, 0), 0);
5835 }
5836 else if (GET_CODE (op) == CONST_INT)
5837 v = INTVAL (op);
5df189be 5838 else if (GET_CODE (op) == CONST_VECTOR
5839 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5840 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5841
5842 /* The default for v is 0 which is valid in every range. */
5843 if (v < spu_builtin_range[range].low
5844 || v > spu_builtin_range[range].high)
5845 error ("%s expects an integer literal in the range [%d, %d]. ("
5846 HOST_WIDE_INT_PRINT_DEC ")",
5847 d->name,
5848 spu_builtin_range[range].low, spu_builtin_range[range].high,
5849 v);
6352eedf 5850
5851 switch (p)
5852 {
5853 case SPU_BTI_S10_4:
5854 lsbits = 4;
5855 break;
5856 case SPU_BTI_U16_2:
5857 /* This is only used in lqa, and stqa. Even though the insns
5858 encode 16 bits of the address (all but the 2 least
5859 significant), only 14 bits are used because it is masked to
5860 be 16 byte aligned. */
5861 lsbits = 4;
5862 break;
5863 case SPU_BTI_S16_2:
5864 /* This is used for lqr and stqr. */
5865 lsbits = 2;
5866 break;
5867 default:
5868 lsbits = 0;
5869 }
5870
5871 if (GET_CODE (op) == LABEL_REF
5872 || (GET_CODE (op) == SYMBOL_REF
5873 && SYMBOL_REF_FUNCTION_P (op))
5df189be 5874 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 5875 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5876 d->name);
5877 }
5878}
5879
5880
5881static void
5df189be 5882expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 5883 rtx target, rtx ops[])
5884{
5885 enum insn_code icode = d->icode;
5df189be 5886 int i = 0, a;
6352eedf 5887
5888 /* Expand the arguments into rtl. */
5889
5890 if (d->parm[0] != SPU_BTI_VOID)
5891 ops[i++] = target;
5892
5df189be 5893 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
6352eedf 5894 {
5df189be 5895 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 5896 if (arg == 0)
5897 abort ();
5898 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
6352eedf 5899 }
5900}
5901
5902static rtx
5903spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 5904 tree exp, rtx target)
6352eedf 5905{
5906 rtx pat;
5907 rtx ops[8];
5908 enum insn_code icode = d->icode;
5909 enum machine_mode mode, tmode;
5910 int i, p;
5911 tree return_type;
5912
5913 /* Set up ops[] with values from arglist. */
5df189be 5914 expand_builtin_args (d, exp, target, ops);
6352eedf 5915
5916 /* Handle the target operand which must be operand 0. */
5917 i = 0;
5918 if (d->parm[0] != SPU_BTI_VOID)
5919 {
5920
5921 /* We prefer the mode specified for the match_operand otherwise
5922 use the mode from the builtin function prototype. */
5923 tmode = insn_data[d->icode].operand[0].mode;
5924 if (tmode == VOIDmode)
5925 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5926
5927 /* Try to use target because not using it can lead to extra copies
5928 and when we are using all of the registers extra copies leads
5929 to extra spills. */
5930 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5931 ops[0] = target;
5932 else
5933 target = ops[0] = gen_reg_rtx (tmode);
5934
5935 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5936 abort ();
5937
5938 i++;
5939 }
5940
a76866d3 5941 if (d->fcode == SPU_MASK_FOR_LOAD)
5942 {
5943 enum machine_mode mode = insn_data[icode].operand[1].mode;
5944 tree arg;
5945 rtx addr, op, pat;
5946
5947 /* get addr */
5df189be 5948 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 5949 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5950 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5951 addr = memory_address (mode, op);
5952
5953 /* negate addr */
5954 op = gen_reg_rtx (GET_MODE (addr));
5955 emit_insn (gen_rtx_SET (VOIDmode, op,
5956 gen_rtx_NEG (GET_MODE (addr), addr)));
5957 op = gen_rtx_MEM (mode, op);
5958
5959 pat = GEN_FCN (icode) (target, op);
5960 if (!pat)
5961 return 0;
5962 emit_insn (pat);
5963 return target;
5964 }
5965
6352eedf 5966 /* Ignore align_hint, but still expand it's args in case they have
5967 side effects. */
5968 if (icode == CODE_FOR_spu_align_hint)
5969 return 0;
5970
5971 /* Handle the rest of the operands. */
5972 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5973 {
5974 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5975 mode = insn_data[d->icode].operand[i].mode;
5976 else
5977 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5978
5979 /* mode can be VOIDmode here for labels */
5980
5981 /* For specific intrinsics with an immediate operand, e.g.,
5982 si_ai(), we sometimes need to convert the scalar argument to a
5983 vector argument by splatting the scalar. */
5984 if (VECTOR_MODE_P (mode)
5985 && (GET_CODE (ops[i]) == CONST_INT
5986 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 5987 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 5988 {
5989 if (GET_CODE (ops[i]) == CONST_INT)
5990 ops[i] = spu_const (mode, INTVAL (ops[i]));
5991 else
5992 {
5993 rtx reg = gen_reg_rtx (mode);
5994 enum machine_mode imode = GET_MODE_INNER (mode);
5995 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5996 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5997 if (imode != GET_MODE (ops[i]))
5998 ops[i] = convert_to_mode (imode, ops[i],
5999 TYPE_UNSIGNED (spu_builtin_types
6000 [d->parm[i]]));
6001 emit_insn (gen_spu_splats (reg, ops[i]));
6002 ops[i] = reg;
6003 }
6004 }
6005
5df189be 6006 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6007
6352eedf 6008 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6009 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6010 }
6011
6012 switch (insn_data[icode].n_operands)
6013 {
6014 case 0:
6015 pat = GEN_FCN (icode) (0);
6016 break;
6017 case 1:
6018 pat = GEN_FCN (icode) (ops[0]);
6019 break;
6020 case 2:
6021 pat = GEN_FCN (icode) (ops[0], ops[1]);
6022 break;
6023 case 3:
6024 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6025 break;
6026 case 4:
6027 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6028 break;
6029 case 5:
6030 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6031 break;
6032 case 6:
6033 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6034 break;
6035 default:
6036 abort ();
6037 }
6038
6039 if (!pat)
6040 abort ();
6041
6042 if (d->type == B_CALL || d->type == B_BISLED)
6043 emit_call_insn (pat);
6044 else if (d->type == B_JUMP)
6045 {
6046 emit_jump_insn (pat);
6047 emit_barrier ();
6048 }
6049 else
6050 emit_insn (pat);
6051
6052 return_type = spu_builtin_types[d->parm[0]];
6053 if (d->parm[0] != SPU_BTI_VOID
6054 && GET_MODE (target) != TYPE_MODE (return_type))
6055 {
6056 /* target is the return value. It should always be the mode of
6057 the builtin function prototype. */
6058 target = spu_force_reg (TYPE_MODE (return_type), target);
6059 }
6060
6061 return target;
6062}
6063
6064rtx
6065spu_expand_builtin (tree exp,
6066 rtx target,
6067 rtx subtarget ATTRIBUTE_UNUSED,
6068 enum machine_mode mode ATTRIBUTE_UNUSED,
6069 int ignore ATTRIBUTE_UNUSED)
6070{
5df189be 6071 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6072 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6073 struct spu_builtin_description *d;
6074
6075 if (fcode < NUM_SPU_BUILTINS)
6076 {
6077 d = &spu_builtins[fcode];
6078
5df189be 6079 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6080 }
6081 abort ();
6082}
6083
e99f512d 6084/* Implement targetm.vectorize.builtin_mul_widen_even. */
6085static tree
6086spu_builtin_mul_widen_even (tree type)
6087{
e99f512d 6088 switch (TYPE_MODE (type))
6089 {
6090 case V8HImode:
6091 if (TYPE_UNSIGNED (type))
6092 return spu_builtins[SPU_MULE_0].fndecl;
6093 else
6094 return spu_builtins[SPU_MULE_1].fndecl;
6095 break;
6096 default:
6097 return NULL_TREE;
6098 }
6099}
6100
6101/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6102static tree
6103spu_builtin_mul_widen_odd (tree type)
6104{
6105 switch (TYPE_MODE (type))
6106 {
6107 case V8HImode:
6108 if (TYPE_UNSIGNED (type))
6109 return spu_builtins[SPU_MULO_1].fndecl;
6110 else
6111 return spu_builtins[SPU_MULO_0].fndecl;
6112 break;
6113 default:
6114 return NULL_TREE;
6115 }
6116}
6117
a76866d3 6118/* Implement targetm.vectorize.builtin_mask_for_load. */
6119static tree
6120spu_builtin_mask_for_load (void)
6121{
6122 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6123 gcc_assert (d);
6124 return d->fndecl;
6125}
5df189be 6126
a28df51d 6127/* Implement targetm.vectorize.builtin_vectorization_cost. */
6128static int
6129spu_builtin_vectorization_cost (bool runtime_test)
6130{
6131 /* If the branch of the runtime test is taken - i.e. - the vectorized
6132 version is skipped - this incurs a misprediction cost (because the
6133 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6134 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6135 when the vectorized version is executed. */
6136 if (runtime_test)
6137 return -19;
6138 else
6139 return 0;
6140}
6141
0e87db76 6142/* Return true iff, data reference of TYPE can reach vector alignment (16)
6143 after applying N number of iterations. This routine does not determine
6144 how may iterations are required to reach desired alignment. */
6145
6146static bool
a9f1838b 6147spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6148{
6149 if (is_packed)
6150 return false;
6151
6152 /* All other types are naturally aligned. */
6153 return true;
6154}
6155
a0515226 6156/* Implement targetm.vectorize.builtin_vec_perm. */
6157tree
6158spu_builtin_vec_perm (tree type, tree *mask_element_type)
6159{
6160 struct spu_builtin_description *d;
6161
6162 *mask_element_type = unsigned_char_type_node;
6163
6164 switch (TYPE_MODE (type))
6165 {
6166 case V16QImode:
6167 if (TYPE_UNSIGNED (type))
6168 d = &spu_builtins[SPU_SHUFFLE_0];
6169 else
6170 d = &spu_builtins[SPU_SHUFFLE_1];
6171 break;
6172
6173 case V8HImode:
6174 if (TYPE_UNSIGNED (type))
6175 d = &spu_builtins[SPU_SHUFFLE_2];
6176 else
6177 d = &spu_builtins[SPU_SHUFFLE_3];
6178 break;
6179
6180 case V4SImode:
6181 if (TYPE_UNSIGNED (type))
6182 d = &spu_builtins[SPU_SHUFFLE_4];
6183 else
6184 d = &spu_builtins[SPU_SHUFFLE_5];
6185 break;
6186
6187 case V2DImode:
6188 if (TYPE_UNSIGNED (type))
6189 d = &spu_builtins[SPU_SHUFFLE_6];
6190 else
6191 d = &spu_builtins[SPU_SHUFFLE_7];
6192 break;
6193
6194 case V4SFmode:
6195 d = &spu_builtins[SPU_SHUFFLE_8];
6196 break;
6197
6198 case V2DFmode:
6199 d = &spu_builtins[SPU_SHUFFLE_9];
6200 break;
6201
6202 default:
6203 return NULL_TREE;
6204 }
6205
6206 gcc_assert (d);
6207 return d->fndecl;
6208}
6209
d52fd16a 6210/* Count the total number of instructions in each pipe and return the
6211 maximum, which is used as the Minimum Iteration Interval (MII)
6212 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6213 -2 are instructions that can go in pipe0 or pipe1. */
6214static int
6215spu_sms_res_mii (struct ddg *g)
6216{
6217 int i;
6218 unsigned t[4] = {0, 0, 0, 0};
6219
6220 for (i = 0; i < g->num_nodes; i++)
6221 {
6222 rtx insn = g->nodes[i].insn;
6223 int p = get_pipe (insn) + 2;
6224
6225 assert (p >= 0);
6226 assert (p < 4);
6227
6228 t[p]++;
6229 if (dump_file && INSN_P (insn))
6230 fprintf (dump_file, "i%d %s %d %d\n",
6231 INSN_UID (insn),
6232 insn_data[INSN_CODE(insn)].name,
6233 p, t[p]);
6234 }
6235 if (dump_file)
6236 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6237
6238 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6239}
6240
6241
5df189be 6242void
6243spu_init_expanders (void)
6244{
6245 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6246 * frame_pointer_needed is true. We don't know that until we're
6247 * expanding the prologue. */
6248 if (cfun)
6249 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 6250}
6251
6252static enum machine_mode
6253spu_libgcc_cmp_return_mode (void)
6254{
6255
6256/* For SPU word mode is TI mode so it is better to use SImode
6257 for compare returns. */
6258 return SImode;
6259}
6260
6261static enum machine_mode
6262spu_libgcc_shift_count_mode (void)
6263{
6264/* For SPU word mode is TI mode so it is better to use SImode
6265 for shift counts. */
6266 return SImode;
6267}
5a976006 6268
6269/* An early place to adjust some flags after GCC has finished processing
6270 * them. */
6271static void
6272asm_file_start (void)
6273{
6274 /* Variable tracking should be run after all optimizations which
6275 change order of insns. It also needs a valid CFG. */
6276 spu_flag_var_tracking = flag_var_tracking;
6277 flag_var_tracking = 0;
6278
6279 default_file_start ();
6280}
6281