]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
PR middle-end/38981
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
75a70cf9 1/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
51#include "c-common.h"
52#include "machmode.h"
75a70cf9 53#include "gimple.h"
644459d0 54#include "tm-constrs.h"
55#include "spu-builtins.h"
d52fd16a 56#include "ddg.h"
5a976006 57#include "sbitmap.h"
58#include "timevar.h"
59#include "df.h"
6352eedf 60
61/* Builtin types, data and prototypes. */
62struct spu_builtin_range
63{
64 int low, high;
65};
66
67static struct spu_builtin_range spu_builtin_range[] = {
68 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
69 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
70 {0ll, 0x7fll}, /* SPU_BTI_U7 */
71 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
72 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
73 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
74 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
75 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
76 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
77 {0ll, 0xffffll}, /* SPU_BTI_U16 */
78 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
79 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80};
81
644459d0 82\f
83/* Target specific attribute specifications. */
84char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
85
86/* Prototypes and external defs. */
87static void spu_init_builtins (void);
88static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
89static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
90static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
91static rtx get_pic_reg (void);
92static int need_to_save_reg (int regno, int saving);
93static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
94static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
95static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
96 rtx scratch);
97static void emit_nop_for_insn (rtx insn);
98static bool insn_clobbers_hbr (rtx insn);
99static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 100 int distance, sbitmap blocks);
5474166e 101static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
102 enum machine_mode dmode);
644459d0 103static rtx get_branch_target (rtx branch);
644459d0 104static void spu_machine_dependent_reorg (void);
105static int spu_sched_issue_rate (void);
106static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
107 int can_issue_more);
108static int get_pipe (rtx insn);
644459d0 109static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 110static void spu_sched_init_global (FILE *, int, int);
111static void spu_sched_init (FILE *, int, int);
112static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 113static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
117 int flags,
118 unsigned char *no_add_attrs);
119static int spu_naked_function_p (tree func);
fb80456a 120static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
121 const_tree type, unsigned char named);
644459d0 122static tree spu_build_builtin_va_list (void);
8a58ed0a 123static void spu_va_start (tree, rtx);
75a70cf9 124static tree spu_gimplify_va_arg_expr (tree valist, tree type,
125 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 126static int regno_aligned_for_load (int regno);
127static int store_with_one_insn_p (rtx mem);
644459d0 128static int mem_is_padded_component_ref (rtx x);
129static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
130static void spu_asm_globalize_label (FILE * file, const char *name);
131static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 132 int *total, bool speed);
644459d0 133static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
134static void spu_init_libfuncs (void);
fb80456a 135static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 136static void fix_range (const char *);
69ced2d6 137static void spu_encode_section_info (tree, rtx, int);
e99f512d 138static tree spu_builtin_mul_widen_even (tree);
139static tree spu_builtin_mul_widen_odd (tree);
a76866d3 140static tree spu_builtin_mask_for_load (void);
a28df51d 141static int spu_builtin_vectorization_cost (bool);
a9f1838b 142static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 143static tree spu_builtin_vec_perm (tree, tree *);
d52fd16a 144static int spu_sms_res_mii (struct ddg *g);
5a976006 145static void asm_file_start (void);
644459d0 146
147extern const char *reg_names[];
148rtx spu_compare_op0, spu_compare_op1;
149
5474166e 150/* Which instruction set architecture to use. */
151int spu_arch;
152/* Which cpu are we tuning for. */
153int spu_tune;
154
5a976006 155/* The hardware requires 8 insns between a hint and the branch it
156 effects. This variable describes how many rtl instructions the
157 compiler needs to see before inserting a hint, and then the compiler
158 will insert enough nops to make it at least 8 insns. The default is
159 for the compiler to allow up to 2 nops be emitted. The nops are
160 inserted in pairs, so we round down. */
161int spu_hint_dist = (8*4) - (2*4);
162
163/* Determines whether we run variable tracking in machine dependent
164 reorganization. */
165static int spu_flag_var_tracking;
166
644459d0 167enum spu_immediate {
168 SPU_NONE,
169 SPU_IL,
170 SPU_ILA,
171 SPU_ILH,
172 SPU_ILHU,
173 SPU_ORI,
174 SPU_ORHI,
175 SPU_ORBI,
99369027 176 SPU_IOHL
644459d0 177};
dea01258 178enum immediate_class
179{
180 IC_POOL, /* constant pool */
181 IC_IL1, /* one il* instruction */
182 IC_IL2, /* both ilhu and iohl instructions */
183 IC_IL1s, /* one il* instruction */
184 IC_IL2s, /* both ilhu and iohl instructions */
185 IC_FSMBI, /* the fsmbi instruction */
186 IC_CPAT, /* one of the c*d instructions */
5df189be 187 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 188};
644459d0 189
190static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
191static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 192static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
193static enum immediate_class classify_immediate (rtx op,
194 enum machine_mode mode);
644459d0 195
1bd43494 196static enum machine_mode spu_unwind_word_mode (void);
197
ea32e033 198static enum machine_mode
199spu_libgcc_cmp_return_mode (void);
200
201static enum machine_mode
202spu_libgcc_shift_count_mode (void);
203
644459d0 204/* Built in types. */
205tree spu_builtin_types[SPU_BTI_MAX];
206\f
207/* TARGET overrides. */
208
209#undef TARGET_INIT_BUILTINS
210#define TARGET_INIT_BUILTINS spu_init_builtins
211
644459d0 212#undef TARGET_EXPAND_BUILTIN
213#define TARGET_EXPAND_BUILTIN spu_expand_builtin
214
1bd43494 215#undef TARGET_UNWIND_WORD_MODE
216#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 217
218/* The .8byte directive doesn't seem to work well for a 32 bit
219 architecture. */
220#undef TARGET_ASM_UNALIGNED_DI_OP
221#define TARGET_ASM_UNALIGNED_DI_OP NULL
222
223#undef TARGET_RTX_COSTS
224#define TARGET_RTX_COSTS spu_rtx_costs
225
226#undef TARGET_ADDRESS_COST
f529eb25 227#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 228
229#undef TARGET_SCHED_ISSUE_RATE
230#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
231
5a976006 232#undef TARGET_SCHED_INIT_GLOBAL
233#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
234
235#undef TARGET_SCHED_INIT
236#define TARGET_SCHED_INIT spu_sched_init
237
644459d0 238#undef TARGET_SCHED_VARIABLE_ISSUE
239#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
240
5a976006 241#undef TARGET_SCHED_REORDER
242#define TARGET_SCHED_REORDER spu_sched_reorder
243
244#undef TARGET_SCHED_REORDER2
245#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 246
247#undef TARGET_SCHED_ADJUST_COST
248#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
249
250const struct attribute_spec spu_attribute_table[];
251#undef TARGET_ATTRIBUTE_TABLE
252#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
253
254#undef TARGET_ASM_INTEGER
255#define TARGET_ASM_INTEGER spu_assemble_integer
256
257#undef TARGET_SCALAR_MODE_SUPPORTED_P
258#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
259
260#undef TARGET_VECTOR_MODE_SUPPORTED_P
261#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
262
263#undef TARGET_FUNCTION_OK_FOR_SIBCALL
264#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
265
266#undef TARGET_ASM_GLOBALIZE_LABEL
267#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
268
269#undef TARGET_PASS_BY_REFERENCE
270#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
271
272#undef TARGET_MUST_PASS_IN_STACK
273#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
274
275#undef TARGET_BUILD_BUILTIN_VA_LIST
276#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
277
8a58ed0a 278#undef TARGET_EXPAND_BUILTIN_VA_START
279#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
280
644459d0 281#undef TARGET_SETUP_INCOMING_VARARGS
282#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
283
284#undef TARGET_MACHINE_DEPENDENT_REORG
285#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
286
287#undef TARGET_GIMPLIFY_VA_ARG_EXPR
288#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
289
290#undef TARGET_DEFAULT_TARGET_FLAGS
291#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
292
293#undef TARGET_INIT_LIBFUNCS
294#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
295
296#undef TARGET_RETURN_IN_MEMORY
297#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
298
69ced2d6 299#undef TARGET_ENCODE_SECTION_INFO
300#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
301
e99f512d 302#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
303#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
304
305#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
306#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
307
a76866d3 308#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
309#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
310
a28df51d 311#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
312#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
313
0e87db76 314#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
315#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
316
a0515226 317#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
318#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
319
ea32e033 320#undef TARGET_LIBGCC_CMP_RETURN_MODE
321#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
322
323#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
324#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
325
d52fd16a 326#undef TARGET_SCHED_SMS_RES_MII
327#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
328
5a976006 329#undef TARGET_ASM_FILE_START
330#define TARGET_ASM_FILE_START asm_file_start
331
644459d0 332struct gcc_target targetm = TARGET_INITIALIZER;
333
5df189be 334void
335spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
336{
5df189be 337 /* Override some of the default param values. With so many registers
338 larger values are better for these params. */
339 MAX_PENDING_LIST_LENGTH = 128;
340
341 /* With so many registers this is better on by default. */
342 flag_rename_registers = 1;
343}
344
644459d0 345/* Sometimes certain combinations of command options do not make sense
346 on a particular target machine. You can define a macro
347 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
348 executed once just after all the command options have been parsed. */
349void
350spu_override_options (void)
351{
14d408d9 352 /* Small loops will be unpeeled at -O3. For SPU it is more important
353 to keep code small by default. */
354 if (!flag_unroll_loops && !flag_peel_loops
355 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
356 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
357
644459d0 358 flag_omit_frame_pointer = 1;
359
5a976006 360 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 361 if (align_functions < 8)
362 align_functions = 8;
c7b91b14 363
5a976006 364 spu_hint_dist = 8*4 - spu_max_nops*4;
365 if (spu_hint_dist < 0)
366 spu_hint_dist = 0;
367
c7b91b14 368 if (spu_fixed_range_string)
369 fix_range (spu_fixed_range_string);
5474166e 370
371 /* Determine processor architectural level. */
372 if (spu_arch_string)
373 {
374 if (strcmp (&spu_arch_string[0], "cell") == 0)
375 spu_arch = PROCESSOR_CELL;
376 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
377 spu_arch = PROCESSOR_CELLEDP;
378 else
379 error ("Unknown architecture '%s'", &spu_arch_string[0]);
380 }
381
382 /* Determine processor to tune for. */
383 if (spu_tune_string)
384 {
385 if (strcmp (&spu_tune_string[0], "cell") == 0)
386 spu_tune = PROCESSOR_CELL;
387 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
388 spu_tune = PROCESSOR_CELLEDP;
389 else
390 error ("Unknown architecture '%s'", &spu_tune_string[0]);
391 }
98bbec1e 392
13684256 393 /* Change defaults according to the processor architecture. */
394 if (spu_arch == PROCESSOR_CELLEDP)
395 {
396 /* If no command line option has been otherwise specified, change
397 the default to -mno-safe-hints on celledp -- only the original
398 Cell/B.E. processors require this workaround. */
399 if (!(target_flags_explicit & MASK_SAFE_HINTS))
400 target_flags &= ~MASK_SAFE_HINTS;
401 }
402
98bbec1e 403 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 404}
405\f
406/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
407 struct attribute_spec.handler. */
408
409/* Table of machine attributes. */
410const struct attribute_spec spu_attribute_table[] =
411{
412 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
413 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
414 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
415 { NULL, 0, 0, false, false, false, NULL }
416};
417
418/* True if MODE is valid for the target. By "valid", we mean able to
419 be manipulated in non-trivial ways. In particular, this means all
420 the arithmetic is supported. */
421static bool
422spu_scalar_mode_supported_p (enum machine_mode mode)
423{
424 switch (mode)
425 {
426 case QImode:
427 case HImode:
428 case SImode:
429 case SFmode:
430 case DImode:
431 case TImode:
432 case DFmode:
433 return true;
434
435 default:
436 return false;
437 }
438}
439
440/* Similarly for vector modes. "Supported" here is less strict. At
441 least some operations are supported; need to check optabs or builtins
442 for further details. */
443static bool
444spu_vector_mode_supported_p (enum machine_mode mode)
445{
446 switch (mode)
447 {
448 case V16QImode:
449 case V8HImode:
450 case V4SImode:
451 case V2DImode:
452 case V4SFmode:
453 case V2DFmode:
454 return true;
455
456 default:
457 return false;
458 }
459}
460
461/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
462 least significant bytes of the outer mode. This function returns
463 TRUE for the SUBREG's where this is correct. */
464int
465valid_subreg (rtx op)
466{
467 enum machine_mode om = GET_MODE (op);
468 enum machine_mode im = GET_MODE (SUBREG_REG (op));
469 return om != VOIDmode && im != VOIDmode
470 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 471 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
472 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 473}
474
475/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 476 and adjust the start offset. */
644459d0 477static rtx
478adjust_operand (rtx op, HOST_WIDE_INT * start)
479{
480 enum machine_mode mode;
481 int op_size;
38aca5eb 482 /* Strip any paradoxical SUBREG. */
483 if (GET_CODE (op) == SUBREG
484 && (GET_MODE_BITSIZE (GET_MODE (op))
485 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 486 {
487 if (start)
488 *start -=
489 GET_MODE_BITSIZE (GET_MODE (op)) -
490 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
491 op = SUBREG_REG (op);
492 }
493 /* If it is smaller than SI, assure a SUBREG */
494 op_size = GET_MODE_BITSIZE (GET_MODE (op));
495 if (op_size < 32)
496 {
497 if (start)
498 *start += 32 - op_size;
499 op_size = 32;
500 }
501 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
502 mode = mode_for_size (op_size, MODE_INT, 0);
503 if (mode != GET_MODE (op))
504 op = gen_rtx_SUBREG (mode, op, 0);
505 return op;
506}
507
508void
509spu_expand_extv (rtx ops[], int unsignedp)
510{
511 HOST_WIDE_INT width = INTVAL (ops[2]);
512 HOST_WIDE_INT start = INTVAL (ops[3]);
513 HOST_WIDE_INT src_size, dst_size;
514 enum machine_mode src_mode, dst_mode;
515 rtx dst = ops[0], src = ops[1];
516 rtx s;
517
518 dst = adjust_operand (ops[0], 0);
519 dst_mode = GET_MODE (dst);
520 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
521
644459d0 522 src = adjust_operand (src, &start);
523 src_mode = GET_MODE (src);
524 src_size = GET_MODE_BITSIZE (GET_MODE (src));
525
526 if (start > 0)
527 {
528 s = gen_reg_rtx (src_mode);
529 switch (src_mode)
530 {
531 case SImode:
532 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
533 break;
534 case DImode:
535 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
536 break;
537 case TImode:
538 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
539 break;
540 default:
541 abort ();
542 }
543 src = s;
544 }
545
546 if (width < src_size)
547 {
548 rtx pat;
549 int icode;
550 switch (src_mode)
551 {
552 case SImode:
553 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
554 break;
555 case DImode:
556 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
557 break;
558 case TImode:
559 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
560 break;
561 default:
562 abort ();
563 }
564 s = gen_reg_rtx (src_mode);
565 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
566 emit_insn (pat);
567 src = s;
568 }
569
570 convert_move (dst, src, unsignedp);
571}
572
573void
574spu_expand_insv (rtx ops[])
575{
576 HOST_WIDE_INT width = INTVAL (ops[1]);
577 HOST_WIDE_INT start = INTVAL (ops[2]);
578 HOST_WIDE_INT maskbits;
579 enum machine_mode dst_mode, src_mode;
580 rtx dst = ops[0], src = ops[3];
581 int dst_size, src_size;
582 rtx mask;
583 rtx shift_reg;
584 int shift;
585
586
587 if (GET_CODE (ops[0]) == MEM)
588 dst = gen_reg_rtx (TImode);
589 else
590 dst = adjust_operand (dst, &start);
591 dst_mode = GET_MODE (dst);
592 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
593
594 if (CONSTANT_P (src))
595 {
596 enum machine_mode m =
597 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
598 src = force_reg (m, convert_to_mode (m, src, 0));
599 }
600 src = adjust_operand (src, 0);
601 src_mode = GET_MODE (src);
602 src_size = GET_MODE_BITSIZE (GET_MODE (src));
603
604 mask = gen_reg_rtx (dst_mode);
605 shift_reg = gen_reg_rtx (dst_mode);
606 shift = dst_size - start - width;
607
608 /* It's not safe to use subreg here because the compiler assumes
609 that the SUBREG_REG is right justified in the SUBREG. */
610 convert_move (shift_reg, src, 1);
611
612 if (shift > 0)
613 {
614 switch (dst_mode)
615 {
616 case SImode:
617 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
618 break;
619 case DImode:
620 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
621 break;
622 case TImode:
623 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
624 break;
625 default:
626 abort ();
627 }
628 }
629 else if (shift < 0)
630 abort ();
631
632 switch (dst_size)
633 {
634 case 32:
635 maskbits = (-1ll << (32 - width - start));
636 if (start)
637 maskbits += (1ll << (32 - start));
638 emit_move_insn (mask, GEN_INT (maskbits));
639 break;
640 case 64:
641 maskbits = (-1ll << (64 - width - start));
642 if (start)
643 maskbits += (1ll << (64 - start));
644 emit_move_insn (mask, GEN_INT (maskbits));
645 break;
646 case 128:
647 {
648 unsigned char arr[16];
649 int i = start / 8;
650 memset (arr, 0, sizeof (arr));
651 arr[i] = 0xff >> (start & 7);
652 for (i++; i <= (start + width - 1) / 8; i++)
653 arr[i] = 0xff;
654 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
655 emit_move_insn (mask, array_to_constant (TImode, arr));
656 }
657 break;
658 default:
659 abort ();
660 }
661 if (GET_CODE (ops[0]) == MEM)
662 {
663 rtx aligned = gen_reg_rtx (SImode);
664 rtx low = gen_reg_rtx (SImode);
665 rtx addr = gen_reg_rtx (SImode);
666 rtx rotl = gen_reg_rtx (SImode);
667 rtx mask0 = gen_reg_rtx (TImode);
668 rtx mem;
669
670 emit_move_insn (addr, XEXP (ops[0], 0));
671 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
672 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
673 emit_insn (gen_negsi2 (rotl, low));
674 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
675 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
676 mem = change_address (ops[0], TImode, aligned);
677 set_mem_alias_set (mem, 0);
678 emit_move_insn (dst, mem);
679 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
680 emit_move_insn (mem, dst);
681 if (start + width > MEM_ALIGN (ops[0]))
682 {
683 rtx shl = gen_reg_rtx (SImode);
684 rtx mask1 = gen_reg_rtx (TImode);
685 rtx dst1 = gen_reg_rtx (TImode);
686 rtx mem1;
687 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
688 emit_insn (gen_shlqby_ti (mask1, mask, shl));
689 mem1 = adjust_address (mem, TImode, 16);
690 set_mem_alias_set (mem1, 0);
691 emit_move_insn (dst1, mem1);
692 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
693 emit_move_insn (mem1, dst1);
694 }
695 }
696 else
71cd778d 697 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 698}
699
700
701int
702spu_expand_block_move (rtx ops[])
703{
704 HOST_WIDE_INT bytes, align, offset;
705 rtx src, dst, sreg, dreg, target;
706 int i;
707 if (GET_CODE (ops[2]) != CONST_INT
708 || GET_CODE (ops[3]) != CONST_INT
48eb4342 709 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 710 return 0;
711
712 bytes = INTVAL (ops[2]);
713 align = INTVAL (ops[3]);
714
715 if (bytes <= 0)
716 return 1;
717
718 dst = ops[0];
719 src = ops[1];
720
721 if (align == 16)
722 {
723 for (offset = 0; offset + 16 <= bytes; offset += 16)
724 {
725 dst = adjust_address (ops[0], V16QImode, offset);
726 src = adjust_address (ops[1], V16QImode, offset);
727 emit_move_insn (dst, src);
728 }
729 if (offset < bytes)
730 {
731 rtx mask;
732 unsigned char arr[16] = { 0 };
733 for (i = 0; i < bytes - offset; i++)
734 arr[i] = 0xff;
735 dst = adjust_address (ops[0], V16QImode, offset);
736 src = adjust_address (ops[1], V16QImode, offset);
737 mask = gen_reg_rtx (V16QImode);
738 sreg = gen_reg_rtx (V16QImode);
739 dreg = gen_reg_rtx (V16QImode);
740 target = gen_reg_rtx (V16QImode);
741 emit_move_insn (mask, array_to_constant (V16QImode, arr));
742 emit_move_insn (dreg, dst);
743 emit_move_insn (sreg, src);
744 emit_insn (gen_selb (target, dreg, sreg, mask));
745 emit_move_insn (dst, target);
746 }
747 return 1;
748 }
749 return 0;
750}
751
752enum spu_comp_code
753{ SPU_EQ, SPU_GT, SPU_GTU };
754
5474166e 755int spu_comp_icode[12][3] = {
756 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
757 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
758 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
759 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
760 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
761 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
762 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
763 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
764 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
765 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
766 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
767 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 768};
769
770/* Generate a compare for CODE. Return a brand-new rtx that represents
771 the result of the compare. GCC can figure this out too if we don't
772 provide all variations of compares, but GCC always wants to use
773 WORD_MODE, we can generate better code in most cases if we do it
774 ourselves. */
775void
776spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
777{
778 int reverse_compare = 0;
779 int reverse_test = 0;
5d70b918 780 rtx compare_result, eq_result;
781 rtx comp_rtx, eq_rtx;
644459d0 782 rtx target = operands[0];
783 enum machine_mode comp_mode;
784 enum machine_mode op_mode;
5d70b918 785 enum spu_comp_code scode, eq_code, ior_code;
644459d0 786 int index;
5d70b918 787 int eq_test = 0;
644459d0 788
789 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
790 and so on, to keep the constant in operand 1. */
791 if (GET_CODE (spu_compare_op1) == CONST_INT)
792 {
793 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
794 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
795 switch (code)
796 {
797 case GE:
798 spu_compare_op1 = GEN_INT (val);
799 code = GT;
800 break;
801 case LT:
802 spu_compare_op1 = GEN_INT (val);
803 code = LE;
804 break;
805 case GEU:
806 spu_compare_op1 = GEN_INT (val);
807 code = GTU;
808 break;
809 case LTU:
810 spu_compare_op1 = GEN_INT (val);
811 code = LEU;
812 break;
813 default:
814 break;
815 }
816 }
817
5d70b918 818 comp_mode = SImode;
819 op_mode = GET_MODE (spu_compare_op0);
820
644459d0 821 switch (code)
822 {
823 case GE:
644459d0 824 scode = SPU_GT;
07027691 825 if (HONOR_NANS (op_mode))
5d70b918 826 {
827 reverse_compare = 0;
828 reverse_test = 0;
829 eq_test = 1;
830 eq_code = SPU_EQ;
831 }
832 else
833 {
834 reverse_compare = 1;
835 reverse_test = 1;
836 }
644459d0 837 break;
838 case LE:
644459d0 839 scode = SPU_GT;
07027691 840 if (HONOR_NANS (op_mode))
5d70b918 841 {
842 reverse_compare = 1;
843 reverse_test = 0;
844 eq_test = 1;
845 eq_code = SPU_EQ;
846 }
847 else
848 {
849 reverse_compare = 0;
850 reverse_test = 1;
851 }
644459d0 852 break;
853 case LT:
854 reverse_compare = 1;
855 reverse_test = 0;
856 scode = SPU_GT;
857 break;
858 case GEU:
859 reverse_compare = 1;
860 reverse_test = 1;
861 scode = SPU_GTU;
862 break;
863 case LEU:
864 reverse_compare = 0;
865 reverse_test = 1;
866 scode = SPU_GTU;
867 break;
868 case LTU:
869 reverse_compare = 1;
870 reverse_test = 0;
871 scode = SPU_GTU;
872 break;
873 case NE:
874 reverse_compare = 0;
875 reverse_test = 1;
876 scode = SPU_EQ;
877 break;
878
879 case EQ:
880 scode = SPU_EQ;
881 break;
882 case GT:
883 scode = SPU_GT;
884 break;
885 case GTU:
886 scode = SPU_GTU;
887 break;
888 default:
889 scode = SPU_EQ;
890 break;
891 }
892
644459d0 893 switch (op_mode)
894 {
895 case QImode:
896 index = 0;
897 comp_mode = QImode;
898 break;
899 case HImode:
900 index = 1;
901 comp_mode = HImode;
902 break;
903 case SImode:
904 index = 2;
905 break;
906 case DImode:
907 index = 3;
908 break;
909 case TImode:
910 index = 4;
911 break;
912 case SFmode:
913 index = 5;
914 break;
915 case DFmode:
916 index = 6;
917 break;
918 case V16QImode:
5474166e 919 index = 7;
920 comp_mode = op_mode;
921 break;
644459d0 922 case V8HImode:
5474166e 923 index = 8;
924 comp_mode = op_mode;
925 break;
644459d0 926 case V4SImode:
5474166e 927 index = 9;
928 comp_mode = op_mode;
929 break;
644459d0 930 case V4SFmode:
5474166e 931 index = 10;
932 comp_mode = V4SImode;
933 break;
644459d0 934 case V2DFmode:
5474166e 935 index = 11;
936 comp_mode = V2DImode;
644459d0 937 break;
5474166e 938 case V2DImode:
644459d0 939 default:
940 abort ();
941 }
942
07027691 943 if (GET_MODE (spu_compare_op1) == DFmode
944 && (scode != SPU_GT && scode != SPU_EQ))
945 abort ();
644459d0 946
947 if (is_set == 0 && spu_compare_op1 == const0_rtx
948 && (GET_MODE (spu_compare_op0) == SImode
949 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
950 {
951 /* Don't need to set a register with the result when we are
952 comparing against zero and branching. */
953 reverse_test = !reverse_test;
954 compare_result = spu_compare_op0;
955 }
956 else
957 {
958 compare_result = gen_reg_rtx (comp_mode);
959
960 if (reverse_compare)
961 {
962 rtx t = spu_compare_op1;
963 spu_compare_op1 = spu_compare_op0;
964 spu_compare_op0 = t;
965 }
966
967 if (spu_comp_icode[index][scode] == 0)
968 abort ();
969
970 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
971 (spu_compare_op0, op_mode))
972 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
973 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
974 (spu_compare_op1, op_mode))
975 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
976 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
977 spu_compare_op0,
978 spu_compare_op1);
979 if (comp_rtx == 0)
980 abort ();
981 emit_insn (comp_rtx);
982
5d70b918 983 if (eq_test)
984 {
985 eq_result = gen_reg_rtx (comp_mode);
986 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
987 spu_compare_op0,
988 spu_compare_op1);
989 if (eq_rtx == 0)
990 abort ();
991 emit_insn (eq_rtx);
992 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
993 gcc_assert (ior_code != CODE_FOR_nothing);
994 emit_insn (GEN_FCN (ior_code)
995 (compare_result, compare_result, eq_result));
996 }
644459d0 997 }
998
999 if (is_set == 0)
1000 {
1001 rtx bcomp;
1002 rtx loc_ref;
1003
1004 /* We don't have branch on QI compare insns, so we convert the
1005 QI compare result to a HI result. */
1006 if (comp_mode == QImode)
1007 {
1008 rtx old_res = compare_result;
1009 compare_result = gen_reg_rtx (HImode);
1010 comp_mode = HImode;
1011 emit_insn (gen_extendqihi2 (compare_result, old_res));
1012 }
1013
1014 if (reverse_test)
1015 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1016 else
1017 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1018
1019 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1020 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1021 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1022 loc_ref, pc_rtx)));
1023 }
1024 else if (is_set == 2)
1025 {
1026 int compare_size = GET_MODE_BITSIZE (comp_mode);
1027 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1028 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1029 rtx select_mask;
1030 rtx op_t = operands[2];
1031 rtx op_f = operands[3];
1032
1033 /* The result of the comparison can be SI, HI or QI mode. Create a
1034 mask based on that result. */
1035 if (target_size > compare_size)
1036 {
1037 select_mask = gen_reg_rtx (mode);
1038 emit_insn (gen_extend_compare (select_mask, compare_result));
1039 }
1040 else if (target_size < compare_size)
1041 select_mask =
1042 gen_rtx_SUBREG (mode, compare_result,
1043 (compare_size - target_size) / BITS_PER_UNIT);
1044 else if (comp_mode != mode)
1045 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1046 else
1047 select_mask = compare_result;
1048
1049 if (GET_MODE (target) != GET_MODE (op_t)
1050 || GET_MODE (target) != GET_MODE (op_f))
1051 abort ();
1052
1053 if (reverse_test)
1054 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1055 else
1056 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1057 }
1058 else
1059 {
1060 if (reverse_test)
1061 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1062 gen_rtx_NOT (comp_mode, compare_result)));
1063 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1064 emit_insn (gen_extendhisi2 (target, compare_result));
1065 else if (GET_MODE (target) == SImode
1066 && GET_MODE (compare_result) == QImode)
1067 emit_insn (gen_extend_compare (target, compare_result));
1068 else
1069 emit_move_insn (target, compare_result);
1070 }
1071}
1072
1073HOST_WIDE_INT
1074const_double_to_hwint (rtx x)
1075{
1076 HOST_WIDE_INT val;
1077 REAL_VALUE_TYPE rv;
1078 if (GET_MODE (x) == SFmode)
1079 {
1080 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1081 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1082 }
1083 else if (GET_MODE (x) == DFmode)
1084 {
1085 long l[2];
1086 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1087 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1088 val = l[0];
1089 val = (val << 32) | (l[1] & 0xffffffff);
1090 }
1091 else
1092 abort ();
1093 return val;
1094}
1095
1096rtx
1097hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1098{
1099 long tv[2];
1100 REAL_VALUE_TYPE rv;
1101 gcc_assert (mode == SFmode || mode == DFmode);
1102
1103 if (mode == SFmode)
1104 tv[0] = (v << 32) >> 32;
1105 else if (mode == DFmode)
1106 {
1107 tv[1] = (v << 32) >> 32;
1108 tv[0] = v >> 32;
1109 }
1110 real_from_target (&rv, tv, mode);
1111 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1112}
1113
1114void
1115print_operand_address (FILE * file, register rtx addr)
1116{
1117 rtx reg;
1118 rtx offset;
1119
e04cf423 1120 if (GET_CODE (addr) == AND
1121 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1122 && INTVAL (XEXP (addr, 1)) == -16)
1123 addr = XEXP (addr, 0);
1124
644459d0 1125 switch (GET_CODE (addr))
1126 {
1127 case REG:
1128 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1129 break;
1130
1131 case PLUS:
1132 reg = XEXP (addr, 0);
1133 offset = XEXP (addr, 1);
1134 if (GET_CODE (offset) == REG)
1135 {
1136 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1137 reg_names[REGNO (offset)]);
1138 }
1139 else if (GET_CODE (offset) == CONST_INT)
1140 {
1141 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1142 INTVAL (offset), reg_names[REGNO (reg)]);
1143 }
1144 else
1145 abort ();
1146 break;
1147
1148 case CONST:
1149 case LABEL_REF:
1150 case SYMBOL_REF:
1151 case CONST_INT:
1152 output_addr_const (file, addr);
1153 break;
1154
1155 default:
1156 debug_rtx (addr);
1157 abort ();
1158 }
1159}
1160
1161void
1162print_operand (FILE * file, rtx x, int code)
1163{
1164 enum machine_mode mode = GET_MODE (x);
1165 HOST_WIDE_INT val;
1166 unsigned char arr[16];
1167 int xcode = GET_CODE (x);
dea01258 1168 int i, info;
644459d0 1169 if (GET_MODE (x) == VOIDmode)
1170 switch (code)
1171 {
644459d0 1172 case 'L': /* 128 bits, signed */
1173 case 'm': /* 128 bits, signed */
1174 case 'T': /* 128 bits, signed */
1175 case 't': /* 128 bits, signed */
1176 mode = TImode;
1177 break;
644459d0 1178 case 'K': /* 64 bits, signed */
1179 case 'k': /* 64 bits, signed */
1180 case 'D': /* 64 bits, signed */
1181 case 'd': /* 64 bits, signed */
1182 mode = DImode;
1183 break;
644459d0 1184 case 'J': /* 32 bits, signed */
1185 case 'j': /* 32 bits, signed */
1186 case 's': /* 32 bits, signed */
1187 case 'S': /* 32 bits, signed */
1188 mode = SImode;
1189 break;
1190 }
1191 switch (code)
1192 {
1193
1194 case 'j': /* 32 bits, signed */
1195 case 'k': /* 64 bits, signed */
1196 case 'm': /* 128 bits, signed */
1197 if (xcode == CONST_INT
1198 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1199 {
1200 gcc_assert (logical_immediate_p (x, mode));
1201 constant_to_array (mode, x, arr);
1202 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1203 val = trunc_int_for_mode (val, SImode);
1204 switch (which_logical_immediate (val))
1205 {
1206 case SPU_ORI:
1207 break;
1208 case SPU_ORHI:
1209 fprintf (file, "h");
1210 break;
1211 case SPU_ORBI:
1212 fprintf (file, "b");
1213 break;
1214 default:
1215 gcc_unreachable();
1216 }
1217 }
1218 else
1219 gcc_unreachable();
1220 return;
1221
1222 case 'J': /* 32 bits, signed */
1223 case 'K': /* 64 bits, signed */
1224 case 'L': /* 128 bits, signed */
1225 if (xcode == CONST_INT
1226 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1227 {
1228 gcc_assert (logical_immediate_p (x, mode)
1229 || iohl_immediate_p (x, mode));
1230 constant_to_array (mode, x, arr);
1231 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1232 val = trunc_int_for_mode (val, SImode);
1233 switch (which_logical_immediate (val))
1234 {
1235 case SPU_ORI:
1236 case SPU_IOHL:
1237 break;
1238 case SPU_ORHI:
1239 val = trunc_int_for_mode (val, HImode);
1240 break;
1241 case SPU_ORBI:
1242 val = trunc_int_for_mode (val, QImode);
1243 break;
1244 default:
1245 gcc_unreachable();
1246 }
1247 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1248 }
1249 else
1250 gcc_unreachable();
1251 return;
1252
1253 case 't': /* 128 bits, signed */
1254 case 'd': /* 64 bits, signed */
1255 case 's': /* 32 bits, signed */
dea01258 1256 if (CONSTANT_P (x))
644459d0 1257 {
dea01258 1258 enum immediate_class c = classify_immediate (x, mode);
1259 switch (c)
1260 {
1261 case IC_IL1:
1262 constant_to_array (mode, x, arr);
1263 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1264 val = trunc_int_for_mode (val, SImode);
1265 switch (which_immediate_load (val))
1266 {
1267 case SPU_IL:
1268 break;
1269 case SPU_ILA:
1270 fprintf (file, "a");
1271 break;
1272 case SPU_ILH:
1273 fprintf (file, "h");
1274 break;
1275 case SPU_ILHU:
1276 fprintf (file, "hu");
1277 break;
1278 default:
1279 gcc_unreachable ();
1280 }
1281 break;
1282 case IC_CPAT:
1283 constant_to_array (mode, x, arr);
1284 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1285 if (info == 1)
1286 fprintf (file, "b");
1287 else if (info == 2)
1288 fprintf (file, "h");
1289 else if (info == 4)
1290 fprintf (file, "w");
1291 else if (info == 8)
1292 fprintf (file, "d");
1293 break;
1294 case IC_IL1s:
1295 if (xcode == CONST_VECTOR)
1296 {
1297 x = CONST_VECTOR_ELT (x, 0);
1298 xcode = GET_CODE (x);
1299 }
1300 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1301 fprintf (file, "a");
1302 else if (xcode == HIGH)
1303 fprintf (file, "hu");
1304 break;
1305 case IC_FSMBI:
5df189be 1306 case IC_FSMBI2:
dea01258 1307 case IC_IL2:
1308 case IC_IL2s:
1309 case IC_POOL:
1310 abort ();
1311 }
644459d0 1312 }
644459d0 1313 else
1314 gcc_unreachable ();
1315 return;
1316
1317 case 'T': /* 128 bits, signed */
1318 case 'D': /* 64 bits, signed */
1319 case 'S': /* 32 bits, signed */
dea01258 1320 if (CONSTANT_P (x))
644459d0 1321 {
dea01258 1322 enum immediate_class c = classify_immediate (x, mode);
1323 switch (c)
644459d0 1324 {
dea01258 1325 case IC_IL1:
1326 constant_to_array (mode, x, arr);
1327 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1328 val = trunc_int_for_mode (val, SImode);
1329 switch (which_immediate_load (val))
1330 {
1331 case SPU_IL:
1332 case SPU_ILA:
1333 break;
1334 case SPU_ILH:
1335 case SPU_ILHU:
1336 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1337 break;
1338 default:
1339 gcc_unreachable ();
1340 }
1341 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1342 break;
1343 case IC_FSMBI:
1344 constant_to_array (mode, x, arr);
1345 val = 0;
1346 for (i = 0; i < 16; i++)
1347 {
1348 val <<= 1;
1349 val |= arr[i] & 1;
1350 }
1351 print_operand (file, GEN_INT (val), 0);
1352 break;
1353 case IC_CPAT:
1354 constant_to_array (mode, x, arr);
1355 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1357 break;
dea01258 1358 case IC_IL1s:
dea01258 1359 if (xcode == HIGH)
5df189be 1360 x = XEXP (x, 0);
1361 if (GET_CODE (x) == CONST_VECTOR)
1362 x = CONST_VECTOR_ELT (x, 0);
1363 output_addr_const (file, x);
1364 if (xcode == HIGH)
1365 fprintf (file, "@h");
644459d0 1366 break;
dea01258 1367 case IC_IL2:
1368 case IC_IL2s:
5df189be 1369 case IC_FSMBI2:
dea01258 1370 case IC_POOL:
1371 abort ();
644459d0 1372 }
c8befdb9 1373 }
644459d0 1374 else
1375 gcc_unreachable ();
1376 return;
1377
644459d0 1378 case 'C':
1379 if (xcode == CONST_INT)
1380 {
1381 /* Only 4 least significant bits are relevant for generate
1382 control word instructions. */
1383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1384 return;
1385 }
1386 break;
1387
1388 case 'M': /* print code for c*d */
1389 if (GET_CODE (x) == CONST_INT)
1390 switch (INTVAL (x))
1391 {
1392 case 1:
1393 fprintf (file, "b");
1394 break;
1395 case 2:
1396 fprintf (file, "h");
1397 break;
1398 case 4:
1399 fprintf (file, "w");
1400 break;
1401 case 8:
1402 fprintf (file, "d");
1403 break;
1404 default:
1405 gcc_unreachable();
1406 }
1407 else
1408 gcc_unreachable();
1409 return;
1410
1411 case 'N': /* Negate the operand */
1412 if (xcode == CONST_INT)
1413 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1414 else if (xcode == CONST_VECTOR)
1415 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1416 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1417 return;
1418
1419 case 'I': /* enable/disable interrupts */
1420 if (xcode == CONST_INT)
1421 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1422 return;
1423
1424 case 'b': /* branch modifiers */
1425 if (xcode == REG)
1426 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1427 else if (COMPARISON_P (x))
1428 fprintf (file, "%s", xcode == NE ? "n" : "");
1429 return;
1430
1431 case 'i': /* indirect call */
1432 if (xcode == MEM)
1433 {
1434 if (GET_CODE (XEXP (x, 0)) == REG)
1435 /* Used in indirect function calls. */
1436 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1437 else
1438 output_address (XEXP (x, 0));
1439 }
1440 return;
1441
1442 case 'p': /* load/store */
1443 if (xcode == MEM)
1444 {
1445 x = XEXP (x, 0);
1446 xcode = GET_CODE (x);
1447 }
e04cf423 1448 if (xcode == AND)
1449 {
1450 x = XEXP (x, 0);
1451 xcode = GET_CODE (x);
1452 }
644459d0 1453 if (xcode == REG)
1454 fprintf (file, "d");
1455 else if (xcode == CONST_INT)
1456 fprintf (file, "a");
1457 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1458 fprintf (file, "r");
1459 else if (xcode == PLUS || xcode == LO_SUM)
1460 {
1461 if (GET_CODE (XEXP (x, 1)) == REG)
1462 fprintf (file, "x");
1463 else
1464 fprintf (file, "d");
1465 }
1466 return;
1467
5df189be 1468 case 'e':
1469 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1470 val &= 0x7;
1471 output_addr_const (file, GEN_INT (val));
1472 return;
1473
1474 case 'f':
1475 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1476 val &= 0x1f;
1477 output_addr_const (file, GEN_INT (val));
1478 return;
1479
1480 case 'g':
1481 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1482 val &= 0x3f;
1483 output_addr_const (file, GEN_INT (val));
1484 return;
1485
1486 case 'h':
1487 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1488 val = (val >> 3) & 0x1f;
1489 output_addr_const (file, GEN_INT (val));
1490 return;
1491
1492 case 'E':
1493 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1494 val = -val;
1495 val &= 0x7;
1496 output_addr_const (file, GEN_INT (val));
1497 return;
1498
1499 case 'F':
1500 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1501 val = -val;
1502 val &= 0x1f;
1503 output_addr_const (file, GEN_INT (val));
1504 return;
1505
1506 case 'G':
1507 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1508 val = -val;
1509 val &= 0x3f;
1510 output_addr_const (file, GEN_INT (val));
1511 return;
1512
1513 case 'H':
1514 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1515 val = -(val & -8ll);
1516 val = (val >> 3) & 0x1f;
1517 output_addr_const (file, GEN_INT (val));
1518 return;
1519
644459d0 1520 case 0:
1521 if (xcode == REG)
1522 fprintf (file, "%s", reg_names[REGNO (x)]);
1523 else if (xcode == MEM)
1524 output_address (XEXP (x, 0));
1525 else if (xcode == CONST_VECTOR)
dea01258 1526 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1527 else
1528 output_addr_const (file, x);
1529 return;
1530
f6a0d06f 1531 /* unused letters
5df189be 1532 o qr uvw yz
1533 AB OPQR UVWXYZ */
644459d0 1534 default:
1535 output_operand_lossage ("invalid %%xn code");
1536 }
1537 gcc_unreachable ();
1538}
1539
1540extern char call_used_regs[];
644459d0 1541
1542/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1543 caller saved register. For leaf functions it is more efficient to
1544 use a volatile register because we won't need to save and restore the
1545 pic register. This routine is only valid after register allocation
1546 is completed, so we can pick an unused register. */
1547static rtx
1548get_pic_reg (void)
1549{
1550 rtx pic_reg = pic_offset_table_rtx;
1551 if (!reload_completed && !reload_in_progress)
1552 abort ();
1553 return pic_reg;
1554}
1555
5df189be 1556/* Split constant addresses to handle cases that are too large.
1557 Add in the pic register when in PIC mode.
1558 Split immediates that require more than 1 instruction. */
dea01258 1559int
1560spu_split_immediate (rtx * ops)
c8befdb9 1561{
dea01258 1562 enum machine_mode mode = GET_MODE (ops[0]);
1563 enum immediate_class c = classify_immediate (ops[1], mode);
1564
1565 switch (c)
c8befdb9 1566 {
dea01258 1567 case IC_IL2:
1568 {
1569 unsigned char arrhi[16];
1570 unsigned char arrlo[16];
98bbec1e 1571 rtx to, temp, hi, lo;
dea01258 1572 int i;
98bbec1e 1573 enum machine_mode imode = mode;
1574 /* We need to do reals as ints because the constant used in the
1575 IOR might not be a legitimate real constant. */
1576 imode = int_mode_for_mode (mode);
dea01258 1577 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1578 if (imode != mode)
1579 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1580 else
1581 to = ops[0];
1582 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1583 for (i = 0; i < 16; i += 4)
1584 {
1585 arrlo[i + 2] = arrhi[i + 2];
1586 arrlo[i + 3] = arrhi[i + 3];
1587 arrlo[i + 0] = arrlo[i + 1] = 0;
1588 arrhi[i + 2] = arrhi[i + 3] = 0;
1589 }
98bbec1e 1590 hi = array_to_constant (imode, arrhi);
1591 lo = array_to_constant (imode, arrlo);
1592 emit_move_insn (temp, hi);
dea01258 1593 emit_insn (gen_rtx_SET
98bbec1e 1594 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1595 return 1;
1596 }
5df189be 1597 case IC_FSMBI2:
1598 {
1599 unsigned char arr_fsmbi[16];
1600 unsigned char arr_andbi[16];
1601 rtx to, reg_fsmbi, reg_and;
1602 int i;
1603 enum machine_mode imode = mode;
1604 /* We need to do reals as ints because the constant used in the
1605 * AND might not be a legitimate real constant. */
1606 imode = int_mode_for_mode (mode);
1607 constant_to_array (mode, ops[1], arr_fsmbi);
1608 if (imode != mode)
1609 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1610 else
1611 to = ops[0];
1612 for (i = 0; i < 16; i++)
1613 if (arr_fsmbi[i] != 0)
1614 {
1615 arr_andbi[0] = arr_fsmbi[i];
1616 arr_fsmbi[i] = 0xff;
1617 }
1618 for (i = 1; i < 16; i++)
1619 arr_andbi[i] = arr_andbi[0];
1620 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1621 reg_and = array_to_constant (imode, arr_andbi);
1622 emit_move_insn (to, reg_fsmbi);
1623 emit_insn (gen_rtx_SET
1624 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1625 return 1;
1626 }
dea01258 1627 case IC_POOL:
1628 if (reload_in_progress || reload_completed)
1629 {
1630 rtx mem = force_const_mem (mode, ops[1]);
1631 if (TARGET_LARGE_MEM)
1632 {
1633 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1634 emit_move_insn (addr, XEXP (mem, 0));
1635 mem = replace_equiv_address (mem, addr);
1636 }
1637 emit_move_insn (ops[0], mem);
1638 return 1;
1639 }
1640 break;
1641 case IC_IL1s:
1642 case IC_IL2s:
1643 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1644 {
1645 if (c == IC_IL2s)
1646 {
5df189be 1647 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1648 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1649 }
1650 else if (flag_pic)
1651 emit_insn (gen_pic (ops[0], ops[1]));
1652 if (flag_pic)
1653 {
1654 rtx pic_reg = get_pic_reg ();
1655 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1656 crtl->uses_pic_offset_table = 1;
dea01258 1657 }
1658 return flag_pic || c == IC_IL2s;
1659 }
1660 break;
1661 case IC_IL1:
1662 case IC_FSMBI:
1663 case IC_CPAT:
1664 break;
c8befdb9 1665 }
dea01258 1666 return 0;
c8befdb9 1667}
1668
644459d0 1669/* SAVING is TRUE when we are generating the actual load and store
1670 instructions for REGNO. When determining the size of the stack
1671 needed for saving register we must allocate enough space for the
1672 worst case, because we don't always have the information early enough
1673 to not allocate it. But we can at least eliminate the actual loads
1674 and stores during the prologue/epilogue. */
1675static int
1676need_to_save_reg (int regno, int saving)
1677{
3072d30e 1678 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1679 return 1;
1680 if (flag_pic
1681 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1682 && (!saving || crtl->uses_pic_offset_table)
644459d0 1683 && (!saving
3072d30e 1684 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1685 return 1;
1686 return 0;
1687}
1688
1689/* This function is only correct starting with local register
1690 allocation */
1691int
1692spu_saved_regs_size (void)
1693{
1694 int reg_save_size = 0;
1695 int regno;
1696
1697 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1698 if (need_to_save_reg (regno, 0))
1699 reg_save_size += 0x10;
1700 return reg_save_size;
1701}
1702
1703static rtx
1704frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1705{
1706 rtx reg = gen_rtx_REG (V4SImode, regno);
1707 rtx mem =
1708 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1709 return emit_insn (gen_movv4si (mem, reg));
1710}
1711
1712static rtx
1713frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1714{
1715 rtx reg = gen_rtx_REG (V4SImode, regno);
1716 rtx mem =
1717 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1718 return emit_insn (gen_movv4si (reg, mem));
1719}
1720
1721/* This happens after reload, so we need to expand it. */
1722static rtx
1723frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1724{
1725 rtx insn;
1726 if (satisfies_constraint_K (GEN_INT (imm)))
1727 {
1728 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1729 }
1730 else
1731 {
3072d30e 1732 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1733 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1734 if (REGNO (src) == REGNO (scratch))
1735 abort ();
1736 }
644459d0 1737 return insn;
1738}
1739
1740/* Return nonzero if this function is known to have a null epilogue. */
1741
1742int
1743direct_return (void)
1744{
1745 if (reload_completed)
1746 {
1747 if (cfun->static_chain_decl == 0
1748 && (spu_saved_regs_size ()
1749 + get_frame_size ()
abe32cce 1750 + crtl->outgoing_args_size
1751 + crtl->args.pretend_args_size == 0)
644459d0 1752 && current_function_is_leaf)
1753 return 1;
1754 }
1755 return 0;
1756}
1757
1758/*
1759 The stack frame looks like this:
1760 +-------------+
1761 | incoming |
a8e019fa 1762 | args |
1763 AP -> +-------------+
644459d0 1764 | $lr save |
1765 +-------------+
1766 prev SP | back chain |
1767 +-------------+
1768 | var args |
abe32cce 1769 | reg save | crtl->args.pretend_args_size bytes
644459d0 1770 +-------------+
1771 | ... |
1772 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1773 FP -> +-------------+
644459d0 1774 | ... |
a8e019fa 1775 | vars | get_frame_size() bytes
1776 HFP -> +-------------+
644459d0 1777 | ... |
1778 | outgoing |
abe32cce 1779 | args | crtl->outgoing_args_size bytes
644459d0 1780 +-------------+
1781 | $lr of next |
1782 | frame |
1783 +-------------+
a8e019fa 1784 | back chain |
1785 SP -> +-------------+
644459d0 1786
1787*/
1788void
1789spu_expand_prologue (void)
1790{
1791 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1792 HOST_WIDE_INT total_size;
1793 HOST_WIDE_INT saved_regs_size;
1794 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1795 rtx scratch_reg_0, scratch_reg_1;
1796 rtx insn, real;
1797
1798 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1799 the "toplevel" insn chain. */
1800 emit_note (NOTE_INSN_DELETED);
1801
1802 if (flag_pic && optimize == 0)
18d50ae6 1803 crtl->uses_pic_offset_table = 1;
644459d0 1804
1805 if (spu_naked_function_p (current_function_decl))
1806 return;
1807
1808 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1809 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1810
1811 saved_regs_size = spu_saved_regs_size ();
1812 total_size = size + saved_regs_size
abe32cce 1813 + crtl->outgoing_args_size
1814 + crtl->args.pretend_args_size;
644459d0 1815
1816 if (!current_function_is_leaf
18d50ae6 1817 || cfun->calls_alloca || total_size > 0)
644459d0 1818 total_size += STACK_POINTER_OFFSET;
1819
1820 /* Save this first because code after this might use the link
1821 register as a scratch register. */
1822 if (!current_function_is_leaf)
1823 {
1824 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1825 RTX_FRAME_RELATED_P (insn) = 1;
1826 }
1827
1828 if (total_size > 0)
1829 {
abe32cce 1830 offset = -crtl->args.pretend_args_size;
644459d0 1831 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1832 if (need_to_save_reg (regno, 1))
1833 {
1834 offset -= 16;
1835 insn = frame_emit_store (regno, sp_reg, offset);
1836 RTX_FRAME_RELATED_P (insn) = 1;
1837 }
1838 }
1839
18d50ae6 1840 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1841 {
1842 rtx pic_reg = get_pic_reg ();
1843 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1844 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1845 }
1846
1847 if (total_size > 0)
1848 {
1849 if (flag_stack_check)
1850 {
d819917f 1851 /* We compare against total_size-1 because
644459d0 1852 ($sp >= total_size) <=> ($sp > total_size-1) */
1853 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1854 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1855 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1856 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1857 {
1858 emit_move_insn (scratch_v4si, size_v4si);
1859 size_v4si = scratch_v4si;
1860 }
1861 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1862 emit_insn (gen_vec_extractv4si
1863 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1864 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1865 }
1866
1867 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1868 the value of the previous $sp because we save it as the back
1869 chain. */
1870 if (total_size <= 2000)
1871 {
1872 /* In this case we save the back chain first. */
1873 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1874 insn =
1875 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1876 }
1877 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1878 {
1879 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1880 insn =
1881 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1882 }
1883 else
1884 {
1885 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1886 insn =
1887 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1888 }
1889 RTX_FRAME_RELATED_P (insn) = 1;
1890 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1891 REG_NOTES (insn) =
1892 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1893
1894 if (total_size > 2000)
1895 {
1896 /* Save the back chain ptr */
1897 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1898 }
1899
1900 if (frame_pointer_needed)
1901 {
1902 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1903 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1904 + crtl->outgoing_args_size;
644459d0 1905 /* Set the new frame_pointer */
d8dfeb55 1906 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1907 RTX_FRAME_RELATED_P (insn) = 1;
1908 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1909 REG_NOTES (insn) =
1910 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1911 real, REG_NOTES (insn));
5df189be 1912 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1913 }
1914 }
1915
1916 emit_note (NOTE_INSN_DELETED);
1917}
1918
1919void
1920spu_expand_epilogue (bool sibcall_p)
1921{
1922 int size = get_frame_size (), offset, regno;
1923 HOST_WIDE_INT saved_regs_size, total_size;
1924 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1925 rtx jump, scratch_reg_0;
1926
1927 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1928 the "toplevel" insn chain. */
1929 emit_note (NOTE_INSN_DELETED);
1930
1931 if (spu_naked_function_p (current_function_decl))
1932 return;
1933
1934 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1935
1936 saved_regs_size = spu_saved_regs_size ();
1937 total_size = size + saved_regs_size
abe32cce 1938 + crtl->outgoing_args_size
1939 + crtl->args.pretend_args_size;
644459d0 1940
1941 if (!current_function_is_leaf
18d50ae6 1942 || cfun->calls_alloca || total_size > 0)
644459d0 1943 total_size += STACK_POINTER_OFFSET;
1944
1945 if (total_size > 0)
1946 {
18d50ae6 1947 if (cfun->calls_alloca)
644459d0 1948 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1949 else
1950 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1951
1952
1953 if (saved_regs_size > 0)
1954 {
abe32cce 1955 offset = -crtl->args.pretend_args_size;
644459d0 1956 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1957 if (need_to_save_reg (regno, 1))
1958 {
1959 offset -= 0x10;
1960 frame_emit_load (regno, sp_reg, offset);
1961 }
1962 }
1963 }
1964
1965 if (!current_function_is_leaf)
1966 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1967
1968 if (!sibcall_p)
1969 {
18b42941 1970 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 1971 jump = emit_jump_insn (gen__return ());
1972 emit_barrier_after (jump);
1973 }
1974
1975 emit_note (NOTE_INSN_DELETED);
1976}
1977
1978rtx
1979spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1980{
1981 if (count != 0)
1982 return 0;
1983 /* This is inefficient because it ends up copying to a save-register
1984 which then gets saved even though $lr has already been saved. But
1985 it does generate better code for leaf functions and we don't need
1986 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1987 used for __builtin_return_address anyway, so maybe we don't care if
1988 it's inefficient. */
1989 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1990}
1991\f
1992
1993/* Given VAL, generate a constant appropriate for MODE.
1994 If MODE is a vector mode, every element will be VAL.
1995 For TImode, VAL will be zero extended to 128 bits. */
1996rtx
1997spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1998{
1999 rtx inner;
2000 rtvec v;
2001 int units, i;
2002
2003 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2004 || GET_MODE_CLASS (mode) == MODE_FLOAT
2005 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2006 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2007
2008 if (GET_MODE_CLASS (mode) == MODE_INT)
2009 return immed_double_const (val, 0, mode);
2010
2011 /* val is the bit representation of the float */
2012 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2013 return hwint_to_const_double (mode, val);
2014
2015 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2016 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2017 else
2018 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2019
2020 units = GET_MODE_NUNITS (mode);
2021
2022 v = rtvec_alloc (units);
2023
2024 for (i = 0; i < units; ++i)
2025 RTVEC_ELT (v, i) = inner;
2026
2027 return gen_rtx_CONST_VECTOR (mode, v);
2028}
644459d0 2029
5474166e 2030/* Create a MODE vector constant from 4 ints. */
2031rtx
2032spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2033{
2034 unsigned char arr[16];
2035 arr[0] = (a >> 24) & 0xff;
2036 arr[1] = (a >> 16) & 0xff;
2037 arr[2] = (a >> 8) & 0xff;
2038 arr[3] = (a >> 0) & 0xff;
2039 arr[4] = (b >> 24) & 0xff;
2040 arr[5] = (b >> 16) & 0xff;
2041 arr[6] = (b >> 8) & 0xff;
2042 arr[7] = (b >> 0) & 0xff;
2043 arr[8] = (c >> 24) & 0xff;
2044 arr[9] = (c >> 16) & 0xff;
2045 arr[10] = (c >> 8) & 0xff;
2046 arr[11] = (c >> 0) & 0xff;
2047 arr[12] = (d >> 24) & 0xff;
2048 arr[13] = (d >> 16) & 0xff;
2049 arr[14] = (d >> 8) & 0xff;
2050 arr[15] = (d >> 0) & 0xff;
2051 return array_to_constant(mode, arr);
2052}
5a976006 2053\f
2054/* branch hint stuff */
5474166e 2055
644459d0 2056/* An array of these is used to propagate hints to predecessor blocks. */
2057struct spu_bb_info
2058{
5a976006 2059 rtx prop_jump; /* propagated from another block */
2060 int bb_index; /* the original block. */
644459d0 2061};
5a976006 2062static struct spu_bb_info *spu_bb_info;
644459d0 2063
5a976006 2064#define STOP_HINT_P(INSN) \
2065 (GET_CODE(INSN) == CALL_INSN \
2066 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2067 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2068
2069/* 1 when RTX is a hinted branch or its target. We keep track of
2070 what has been hinted so the safe-hint code can test it easily. */
2071#define HINTED_P(RTX) \
2072 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2073
2074/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2075#define SCHED_ON_EVEN_P(RTX) \
2076 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2077
2078/* Emit a nop for INSN such that the two will dual issue. This assumes
2079 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2080 We check for TImode to handle a MULTI1 insn which has dual issued its
2081 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2082 ADDR_VEC insns. */
2083static void
2084emit_nop_for_insn (rtx insn)
644459d0 2085{
5a976006 2086 int p;
2087 rtx new_insn;
2088 p = get_pipe (insn);
2089 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2090 new_insn = emit_insn_after (gen_lnop (), insn);
2091 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2092 {
5a976006 2093 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2094 PUT_MODE (new_insn, TImode);
2095 PUT_MODE (insn, VOIDmode);
2096 }
2097 else
2098 new_insn = emit_insn_after (gen_lnop (), insn);
2099 recog_memoized (new_insn);
2100}
2101
2102/* Insert nops in basic blocks to meet dual issue alignment
2103 requirements. Also make sure hbrp and hint instructions are at least
2104 one cycle apart, possibly inserting a nop. */
2105static void
2106pad_bb(void)
2107{
2108 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2109 int length;
2110 int addr;
2111
2112 /* This sets up INSN_ADDRESSES. */
2113 shorten_branches (get_insns ());
2114
2115 /* Keep track of length added by nops. */
2116 length = 0;
2117
2118 prev_insn = 0;
2119 insn = get_insns ();
2120 if (!active_insn_p (insn))
2121 insn = next_active_insn (insn);
2122 for (; insn; insn = next_insn)
2123 {
2124 next_insn = next_active_insn (insn);
2125 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2126 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2127 {
5a976006 2128 if (hbr_insn)
2129 {
2130 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2131 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2132 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2133 || (a1 - a0 == 4))
2134 {
2135 prev_insn = emit_insn_before (gen_lnop (), insn);
2136 PUT_MODE (prev_insn, GET_MODE (insn));
2137 PUT_MODE (insn, TImode);
2138 length += 4;
2139 }
2140 }
2141 hbr_insn = insn;
2142 }
2143 if (INSN_CODE (insn) == CODE_FOR_blockage)
2144 {
2145 if (GET_MODE (insn) == TImode)
2146 PUT_MODE (next_insn, TImode);
2147 insn = next_insn;
2148 next_insn = next_active_insn (insn);
2149 }
2150 addr = INSN_ADDRESSES (INSN_UID (insn));
2151 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2152 {
2153 if (((addr + length) & 7) != 0)
2154 {
2155 emit_nop_for_insn (prev_insn);
2156 length += 4;
2157 }
644459d0 2158 }
5a976006 2159 else if (GET_MODE (insn) == TImode
2160 && ((next_insn && GET_MODE (next_insn) != TImode)
2161 || get_attr_type (insn) == TYPE_MULTI0)
2162 && ((addr + length) & 7) != 0)
2163 {
2164 /* prev_insn will always be set because the first insn is
2165 always 8-byte aligned. */
2166 emit_nop_for_insn (prev_insn);
2167 length += 4;
2168 }
2169 prev_insn = insn;
644459d0 2170 }
644459d0 2171}
2172
5a976006 2173\f
2174/* Routines for branch hints. */
2175
644459d0 2176static void
5a976006 2177spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2178 int distance, sbitmap blocks)
644459d0 2179{
5a976006 2180 rtx branch_label = 0;
2181 rtx hint;
2182 rtx insn;
2183 rtx table;
644459d0 2184
2185 if (before == 0 || branch == 0 || target == 0)
2186 return;
2187
5a976006 2188 /* While scheduling we require hints to be no further than 600, so
2189 we need to enforce that here too */
644459d0 2190 if (distance > 600)
2191 return;
2192
5a976006 2193 /* If we have a Basic block note, emit it after the basic block note. */
2194 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2195 before = NEXT_INSN (before);
644459d0 2196
2197 branch_label = gen_label_rtx ();
2198 LABEL_NUSES (branch_label)++;
2199 LABEL_PRESERVE_P (branch_label) = 1;
2200 insn = emit_label_before (branch_label, branch);
2201 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2202 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2203
2204 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2205 recog_memoized (hint);
2206 HINTED_P (branch) = 1;
644459d0 2207
5a976006 2208 if (GET_CODE (target) == LABEL_REF)
2209 HINTED_P (XEXP (target, 0)) = 1;
2210 else if (tablejump_p (branch, 0, &table))
644459d0 2211 {
5a976006 2212 rtvec vec;
2213 int j;
2214 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2215 vec = XVEC (PATTERN (table), 0);
2216 else
2217 vec = XVEC (PATTERN (table), 1);
2218 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2219 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2220 }
5a976006 2221
2222 if (distance >= 588)
644459d0 2223 {
5a976006 2224 /* Make sure the hint isn't scheduled any earlier than this point,
2225 which could make it too far for the branch offest to fit */
2226 recog_memoized (emit_insn_before (gen_blockage (), hint));
2227 }
2228 else if (distance <= 8 * 4)
2229 {
2230 /* To guarantee at least 8 insns between the hint and branch we
2231 insert nops. */
2232 int d;
2233 for (d = distance; d < 8 * 4; d += 4)
2234 {
2235 insn =
2236 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2237 recog_memoized (insn);
2238 }
2239
2240 /* Make sure any nops inserted aren't scheduled before the hint. */
2241 recog_memoized (emit_insn_after (gen_blockage (), hint));
2242
2243 /* Make sure any nops inserted aren't scheduled after the call. */
2244 if (CALL_P (branch) && distance < 8 * 4)
2245 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2246 }
644459d0 2247}
2248
2249/* Returns 0 if we don't want a hint for this branch. Otherwise return
2250 the rtx for the branch target. */
2251static rtx
2252get_branch_target (rtx branch)
2253{
2254 if (GET_CODE (branch) == JUMP_INSN)
2255 {
2256 rtx set, src;
2257
2258 /* Return statements */
2259 if (GET_CODE (PATTERN (branch)) == RETURN)
2260 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2261
2262 /* jump table */
2263 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2264 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2265 return 0;
2266
2267 set = single_set (branch);
2268 src = SET_SRC (set);
2269 if (GET_CODE (SET_DEST (set)) != PC)
2270 abort ();
2271
2272 if (GET_CODE (src) == IF_THEN_ELSE)
2273 {
2274 rtx lab = 0;
2275 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2276 if (note)
2277 {
2278 /* If the more probable case is not a fall through, then
2279 try a branch hint. */
2280 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2281 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2282 && GET_CODE (XEXP (src, 1)) != PC)
2283 lab = XEXP (src, 1);
2284 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2285 && GET_CODE (XEXP (src, 2)) != PC)
2286 lab = XEXP (src, 2);
2287 }
2288 if (lab)
2289 {
2290 if (GET_CODE (lab) == RETURN)
2291 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2292 return lab;
2293 }
2294 return 0;
2295 }
2296
2297 return src;
2298 }
2299 else if (GET_CODE (branch) == CALL_INSN)
2300 {
2301 rtx call;
2302 /* All of our call patterns are in a PARALLEL and the CALL is
2303 the first pattern in the PARALLEL. */
2304 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2305 abort ();
2306 call = XVECEXP (PATTERN (branch), 0, 0);
2307 if (GET_CODE (call) == SET)
2308 call = SET_SRC (call);
2309 if (GET_CODE (call) != CALL)
2310 abort ();
2311 return XEXP (XEXP (call, 0), 0);
2312 }
2313 return 0;
2314}
2315
5a976006 2316/* The special $hbr register is used to prevent the insn scheduler from
2317 moving hbr insns across instructions which invalidate them. It
2318 should only be used in a clobber, and this function searches for
2319 insns which clobber it. */
2320static bool
2321insn_clobbers_hbr (rtx insn)
2322{
2323 if (INSN_P (insn)
2324 && GET_CODE (PATTERN (insn)) == PARALLEL)
2325 {
2326 rtx parallel = PATTERN (insn);
2327 rtx clobber;
2328 int j;
2329 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2330 {
2331 clobber = XVECEXP (parallel, 0, j);
2332 if (GET_CODE (clobber) == CLOBBER
2333 && GET_CODE (XEXP (clobber, 0)) == REG
2334 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2335 return 1;
2336 }
2337 }
2338 return 0;
2339}
2340
2341/* Search up to 32 insns starting at FIRST:
2342 - at any kind of hinted branch, just return
2343 - at any unconditional branch in the first 15 insns, just return
2344 - at a call or indirect branch, after the first 15 insns, force it to
2345 an even address and return
2346 - at any unconditional branch, after the first 15 insns, force it to
2347 an even address.
2348 At then end of the search, insert an hbrp within 4 insns of FIRST,
2349 and an hbrp within 16 instructions of FIRST.
2350 */
644459d0 2351static void
5a976006 2352insert_hbrp_for_ilb_runout (rtx first)
644459d0 2353{
5a976006 2354 rtx insn, before_4 = 0, before_16 = 0;
2355 int addr = 0, length, first_addr = -1;
2356 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2357 int insert_lnop_after = 0;
2358 for (insn = first; insn; insn = NEXT_INSN (insn))
2359 if (INSN_P (insn))
2360 {
2361 if (first_addr == -1)
2362 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2363 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2364 length = get_attr_length (insn);
2365
2366 if (before_4 == 0 && addr + length >= 4 * 4)
2367 before_4 = insn;
2368 /* We test for 14 instructions because the first hbrp will add
2369 up to 2 instructions. */
2370 if (before_16 == 0 && addr + length >= 14 * 4)
2371 before_16 = insn;
2372
2373 if (INSN_CODE (insn) == CODE_FOR_hbr)
2374 {
2375 /* Make sure an hbrp is at least 2 cycles away from a hint.
2376 Insert an lnop after the hbrp when necessary. */
2377 if (before_4 == 0 && addr > 0)
2378 {
2379 before_4 = insn;
2380 insert_lnop_after |= 1;
2381 }
2382 else if (before_4 && addr <= 4 * 4)
2383 insert_lnop_after |= 1;
2384 if (before_16 == 0 && addr > 10 * 4)
2385 {
2386 before_16 = insn;
2387 insert_lnop_after |= 2;
2388 }
2389 else if (before_16 && addr <= 14 * 4)
2390 insert_lnop_after |= 2;
2391 }
644459d0 2392
5a976006 2393 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2394 {
2395 if (addr < hbrp_addr0)
2396 hbrp_addr0 = addr;
2397 else if (addr < hbrp_addr1)
2398 hbrp_addr1 = addr;
2399 }
644459d0 2400
5a976006 2401 if (CALL_P (insn) || JUMP_P (insn))
2402 {
2403 if (HINTED_P (insn))
2404 return;
2405
2406 /* Any branch after the first 15 insns should be on an even
2407 address to avoid a special case branch. There might be
2408 some nops and/or hbrps inserted, so we test after 10
2409 insns. */
2410 if (addr > 10 * 4)
2411 SCHED_ON_EVEN_P (insn) = 1;
2412 }
644459d0 2413
5a976006 2414 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2415 return;
2416
2417
2418 if (addr + length >= 32 * 4)
644459d0 2419 {
5a976006 2420 gcc_assert (before_4 && before_16);
2421 if (hbrp_addr0 > 4 * 4)
644459d0 2422 {
5a976006 2423 insn =
2424 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2425 recog_memoized (insn);
2426 INSN_ADDRESSES_NEW (insn,
2427 INSN_ADDRESSES (INSN_UID (before_4)));
2428 PUT_MODE (insn, GET_MODE (before_4));
2429 PUT_MODE (before_4, TImode);
2430 if (insert_lnop_after & 1)
644459d0 2431 {
5a976006 2432 insn = emit_insn_before (gen_lnop (), before_4);
2433 recog_memoized (insn);
2434 INSN_ADDRESSES_NEW (insn,
2435 INSN_ADDRESSES (INSN_UID (before_4)));
2436 PUT_MODE (insn, TImode);
644459d0 2437 }
644459d0 2438 }
5a976006 2439 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2440 && hbrp_addr1 > 16 * 4)
644459d0 2441 {
5a976006 2442 insn =
2443 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2444 recog_memoized (insn);
2445 INSN_ADDRESSES_NEW (insn,
2446 INSN_ADDRESSES (INSN_UID (before_16)));
2447 PUT_MODE (insn, GET_MODE (before_16));
2448 PUT_MODE (before_16, TImode);
2449 if (insert_lnop_after & 2)
644459d0 2450 {
5a976006 2451 insn = emit_insn_before (gen_lnop (), before_16);
2452 recog_memoized (insn);
2453 INSN_ADDRESSES_NEW (insn,
2454 INSN_ADDRESSES (INSN_UID
2455 (before_16)));
2456 PUT_MODE (insn, TImode);
644459d0 2457 }
2458 }
5a976006 2459 return;
644459d0 2460 }
644459d0 2461 }
5a976006 2462 else if (BARRIER_P (insn))
2463 return;
644459d0 2464
644459d0 2465}
5a976006 2466
2467/* The SPU might hang when it executes 48 inline instructions after a
2468 hinted branch jumps to its hinted target. The beginning of a
2469 function and the return from a call might have been hinted, and must
2470 be handled as well. To prevent a hang we insert 2 hbrps. The first
2471 should be within 6 insns of the branch target. The second should be
2472 within 22 insns of the branch target. When determining if hbrps are
2473 necessary, we look for only 32 inline instructions, because up to to
2474 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2475 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2476static void
5a976006 2477insert_hbrp (void)
644459d0 2478{
5a976006 2479 rtx insn;
2480 if (TARGET_SAFE_HINTS)
644459d0 2481 {
5a976006 2482 shorten_branches (get_insns ());
2483 /* Insert hbrp at beginning of function */
2484 insn = next_active_insn (get_insns ());
2485 if (insn)
2486 insert_hbrp_for_ilb_runout (insn);
2487 /* Insert hbrp after hinted targets. */
2488 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2489 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2490 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2491 }
644459d0 2492}
2493
5a976006 2494static int in_spu_reorg;
2495
2496/* Insert branch hints. There are no branch optimizations after this
2497 pass, so it's safe to set our branch hints now. */
644459d0 2498static void
5a976006 2499spu_machine_dependent_reorg (void)
644459d0 2500{
5a976006 2501 sbitmap blocks;
2502 basic_block bb;
2503 rtx branch, insn;
2504 rtx branch_target = 0;
2505 int branch_addr = 0, insn_addr, required_dist = 0;
2506 int i;
2507 unsigned int j;
644459d0 2508
5a976006 2509 if (!TARGET_BRANCH_HINTS || optimize == 0)
2510 {
2511 /* We still do it for unoptimized code because an external
2512 function might have hinted a call or return. */
2513 insert_hbrp ();
2514 pad_bb ();
2515 return;
2516 }
644459d0 2517
5a976006 2518 blocks = sbitmap_alloc (last_basic_block);
2519 sbitmap_zero (blocks);
644459d0 2520
5a976006 2521 in_spu_reorg = 1;
2522 compute_bb_for_insn ();
2523
2524 compact_blocks ();
2525
2526 spu_bb_info =
2527 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2528 sizeof (struct spu_bb_info));
2529
2530 /* We need exact insn addresses and lengths. */
2531 shorten_branches (get_insns ());
2532
2533 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2534 {
5a976006 2535 bb = BASIC_BLOCK (i);
2536 branch = 0;
2537 if (spu_bb_info[i].prop_jump)
644459d0 2538 {
5a976006 2539 branch = spu_bb_info[i].prop_jump;
2540 branch_target = get_branch_target (branch);
2541 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2542 required_dist = spu_hint_dist;
2543 }
2544 /* Search from end of a block to beginning. In this loop, find
2545 jumps which need a branch and emit them only when:
2546 - it's an indirect branch and we're at the insn which sets
2547 the register
2548 - we're at an insn that will invalidate the hint. e.g., a
2549 call, another hint insn, inline asm that clobbers $hbr, and
2550 some inlined operations (divmodsi4). Don't consider jumps
2551 because they are only at the end of a block and are
2552 considered when we are deciding whether to propagate
2553 - we're getting too far away from the branch. The hbr insns
2554 only have a signed 10 bit offset
2555 We go back as far as possible so the branch will be considered
2556 for propagation when we get to the beginning of the block. */
2557 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2558 {
2559 if (INSN_P (insn))
2560 {
2561 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2562 if (branch
2563 && ((GET_CODE (branch_target) == REG
2564 && set_of (branch_target, insn) != NULL_RTX)
2565 || insn_clobbers_hbr (insn)
2566 || branch_addr - insn_addr > 600))
2567 {
2568 rtx next = NEXT_INSN (insn);
2569 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2570 if (insn != BB_END (bb)
2571 && branch_addr - next_addr >= required_dist)
2572 {
2573 if (dump_file)
2574 fprintf (dump_file,
2575 "hint for %i in block %i before %i\n",
2576 INSN_UID (branch), bb->index,
2577 INSN_UID (next));
2578 spu_emit_branch_hint (next, branch, branch_target,
2579 branch_addr - next_addr, blocks);
2580 }
2581 branch = 0;
2582 }
2583
2584 /* JUMP_P will only be true at the end of a block. When
2585 branch is already set it means we've previously decided
2586 to propagate a hint for that branch into this block. */
2587 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2588 {
2589 branch = 0;
2590 if ((branch_target = get_branch_target (insn)))
2591 {
2592 branch = insn;
2593 branch_addr = insn_addr;
2594 required_dist = spu_hint_dist;
2595 }
2596 }
2597 }
2598 if (insn == BB_HEAD (bb))
2599 break;
2600 }
2601
2602 if (branch)
2603 {
2604 /* If we haven't emitted a hint for this branch yet, it might
2605 be profitable to emit it in one of the predecessor blocks,
2606 especially for loops. */
2607 rtx bbend;
2608 basic_block prev = 0, prop = 0, prev2 = 0;
2609 int loop_exit = 0, simple_loop = 0;
2610 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2611
2612 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2613 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2614 prev = EDGE_PRED (bb, j)->src;
2615 else
2616 prev2 = EDGE_PRED (bb, j)->src;
2617
2618 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2619 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2620 loop_exit = 1;
2621 else if (EDGE_SUCC (bb, j)->dest == bb)
2622 simple_loop = 1;
2623
2624 /* If this branch is a loop exit then propagate to previous
2625 fallthru block. This catches the cases when it is a simple
2626 loop or when there is an initial branch into the loop. */
2627 if (prev && (loop_exit || simple_loop)
2628 && prev->loop_depth <= bb->loop_depth)
2629 prop = prev;
2630
2631 /* If there is only one adjacent predecessor. Don't propagate
2632 outside this loop. This loop_depth test isn't perfect, but
2633 I'm not sure the loop_father member is valid at this point. */
2634 else if (prev && single_pred_p (bb)
2635 && prev->loop_depth == bb->loop_depth)
2636 prop = prev;
2637
2638 /* If this is the JOIN block of a simple IF-THEN then
2639 propogate the hint to the HEADER block. */
2640 else if (prev && prev2
2641 && EDGE_COUNT (bb->preds) == 2
2642 && EDGE_COUNT (prev->preds) == 1
2643 && EDGE_PRED (prev, 0)->src == prev2
2644 && prev2->loop_depth == bb->loop_depth
2645 && GET_CODE (branch_target) != REG)
2646 prop = prev;
2647
2648 /* Don't propagate when:
2649 - this is a simple loop and the hint would be too far
2650 - this is not a simple loop and there are 16 insns in
2651 this block already
2652 - the predecessor block ends in a branch that will be
2653 hinted
2654 - the predecessor block ends in an insn that invalidates
2655 the hint */
2656 if (prop
2657 && prop->index >= 0
2658 && (bbend = BB_END (prop))
2659 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2660 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2661 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2662 {
2663 if (dump_file)
2664 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2665 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2666 bb->index, prop->index, bb->loop_depth,
2667 INSN_UID (branch), loop_exit, simple_loop,
2668 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2669
2670 spu_bb_info[prop->index].prop_jump = branch;
2671 spu_bb_info[prop->index].bb_index = i;
2672 }
2673 else if (branch_addr - next_addr >= required_dist)
2674 {
2675 if (dump_file)
2676 fprintf (dump_file, "hint for %i in block %i before %i\n",
2677 INSN_UID (branch), bb->index,
2678 INSN_UID (NEXT_INSN (insn)));
2679 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2680 branch_addr - next_addr, blocks);
2681 }
2682 branch = 0;
644459d0 2683 }
644459d0 2684 }
5a976006 2685 free (spu_bb_info);
644459d0 2686
5a976006 2687 if (!sbitmap_empty_p (blocks))
2688 find_many_sub_basic_blocks (blocks);
2689
2690 /* We have to schedule to make sure alignment is ok. */
2691 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2692
2693 /* The hints need to be scheduled, so call it again. */
2694 schedule_insns ();
2695
2696 insert_hbrp ();
2697
2698 pad_bb ();
2699
2700
2701 if (spu_flag_var_tracking)
644459d0 2702 {
5a976006 2703 df_analyze ();
2704 timevar_push (TV_VAR_TRACKING);
2705 variable_tracking_main ();
2706 timevar_pop (TV_VAR_TRACKING);
2707 df_finish_pass (false);
644459d0 2708 }
5a976006 2709
2710 free_bb_for_insn ();
2711
2712 in_spu_reorg = 0;
644459d0 2713}
2714\f
2715
2716/* Insn scheduling routines, primarily for dual issue. */
2717static int
2718spu_sched_issue_rate (void)
2719{
2720 return 2;
2721}
2722
2723static int
5a976006 2724uses_ls_unit(rtx insn)
644459d0 2725{
5a976006 2726 rtx set = single_set (insn);
2727 if (set != 0
2728 && (GET_CODE (SET_DEST (set)) == MEM
2729 || GET_CODE (SET_SRC (set)) == MEM))
2730 return 1;
2731 return 0;
644459d0 2732}
2733
2734static int
2735get_pipe (rtx insn)
2736{
2737 enum attr_type t;
2738 /* Handle inline asm */
2739 if (INSN_CODE (insn) == -1)
2740 return -1;
2741 t = get_attr_type (insn);
2742 switch (t)
2743 {
2744 case TYPE_CONVERT:
2745 return -2;
2746 case TYPE_MULTI0:
2747 return -1;
2748
2749 case TYPE_FX2:
2750 case TYPE_FX3:
2751 case TYPE_SPR:
2752 case TYPE_NOP:
2753 case TYPE_FXB:
2754 case TYPE_FPD:
2755 case TYPE_FP6:
2756 case TYPE_FP7:
644459d0 2757 return 0;
2758
2759 case TYPE_LNOP:
2760 case TYPE_SHUF:
2761 case TYPE_LOAD:
2762 case TYPE_STORE:
2763 case TYPE_BR:
2764 case TYPE_MULTI1:
2765 case TYPE_HBR:
5a976006 2766 case TYPE_IPREFETCH:
644459d0 2767 return 1;
2768 default:
2769 abort ();
2770 }
2771}
2772
5a976006 2773
2774/* haifa-sched.c has a static variable that keeps track of the current
2775 cycle. It is passed to spu_sched_reorder, and we record it here for
2776 use by spu_sched_variable_issue. It won't be accurate if the
2777 scheduler updates it's clock_var between the two calls. */
2778static int clock_var;
2779
2780/* This is used to keep track of insn alignment. Set to 0 at the
2781 beginning of each block and increased by the "length" attr of each
2782 insn scheduled. */
2783static int spu_sched_length;
2784
2785/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2786 ready list appropriately in spu_sched_reorder(). */
2787static int pipe0_clock;
2788static int pipe1_clock;
2789
2790static int prev_clock_var;
2791
2792static int prev_priority;
2793
2794/* The SPU needs to load the next ilb sometime during the execution of
2795 the previous ilb. There is a potential conflict if every cycle has a
2796 load or store. To avoid the conflict we make sure the load/store
2797 unit is free for at least one cycle during the execution of insns in
2798 the previous ilb. */
2799static int spu_ls_first;
2800static int prev_ls_clock;
2801
2802static void
2803spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2804 int max_ready ATTRIBUTE_UNUSED)
2805{
2806 spu_sched_length = 0;
2807}
2808
2809static void
2810spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2811 int max_ready ATTRIBUTE_UNUSED)
2812{
2813 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2814 {
2815 /* When any block might be at least 8-byte aligned, assume they
2816 will all be at least 8-byte aligned to make sure dual issue
2817 works out correctly. */
2818 spu_sched_length = 0;
2819 }
2820 spu_ls_first = INT_MAX;
2821 clock_var = -1;
2822 prev_ls_clock = -1;
2823 pipe0_clock = -1;
2824 pipe1_clock = -1;
2825 prev_clock_var = -1;
2826 prev_priority = -1;
2827}
2828
644459d0 2829static int
5a976006 2830spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2831 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2832{
5a976006 2833 int len;
2834 int p;
644459d0 2835 if (GET_CODE (PATTERN (insn)) == USE
2836 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2837 || (len = get_attr_length (insn)) == 0)
2838 return more;
2839
2840 spu_sched_length += len;
2841
2842 /* Reset on inline asm */
2843 if (INSN_CODE (insn) == -1)
2844 {
2845 spu_ls_first = INT_MAX;
2846 pipe0_clock = -1;
2847 pipe1_clock = -1;
2848 return 0;
2849 }
2850 p = get_pipe (insn);
2851 if (p == 0)
2852 pipe0_clock = clock_var;
2853 else
2854 pipe1_clock = clock_var;
2855
2856 if (in_spu_reorg)
2857 {
2858 if (clock_var - prev_ls_clock > 1
2859 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2860 spu_ls_first = INT_MAX;
2861 if (uses_ls_unit (insn))
2862 {
2863 if (spu_ls_first == INT_MAX)
2864 spu_ls_first = spu_sched_length;
2865 prev_ls_clock = clock_var;
2866 }
2867
2868 /* The scheduler hasn't inserted the nop, but we will later on.
2869 Include those nops in spu_sched_length. */
2870 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2871 spu_sched_length += 4;
2872 prev_clock_var = clock_var;
2873
2874 /* more is -1 when called from spu_sched_reorder for new insns
2875 that don't have INSN_PRIORITY */
2876 if (more >= 0)
2877 prev_priority = INSN_PRIORITY (insn);
2878 }
2879
2880 /* Always try issueing more insns. spu_sched_reorder will decide
2881 when the cycle should be advanced. */
2882 return 1;
2883}
2884
2885/* This function is called for both TARGET_SCHED_REORDER and
2886 TARGET_SCHED_REORDER2. */
2887static int
2888spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2889 rtx *ready, int *nreadyp, int clock)
2890{
2891 int i, nready = *nreadyp;
2892 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2893 rtx insn;
2894
2895 clock_var = clock;
2896
2897 if (nready <= 0 || pipe1_clock >= clock)
2898 return 0;
2899
2900 /* Find any rtl insns that don't generate assembly insns and schedule
2901 them first. */
2902 for (i = nready - 1; i >= 0; i--)
2903 {
2904 insn = ready[i];
2905 if (INSN_CODE (insn) == -1
2906 || INSN_CODE (insn) == CODE_FOR_blockage
2907 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2908 {
2909 ready[i] = ready[nready - 1];
2910 ready[nready - 1] = insn;
2911 return 1;
2912 }
2913 }
2914
2915 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2916 for (i = 0; i < nready; i++)
2917 if (INSN_CODE (ready[i]) != -1)
2918 {
2919 insn = ready[i];
2920 switch (get_attr_type (insn))
2921 {
2922 default:
2923 case TYPE_MULTI0:
2924 case TYPE_CONVERT:
2925 case TYPE_FX2:
2926 case TYPE_FX3:
2927 case TYPE_SPR:
2928 case TYPE_NOP:
2929 case TYPE_FXB:
2930 case TYPE_FPD:
2931 case TYPE_FP6:
2932 case TYPE_FP7:
2933 pipe_0 = i;
2934 break;
2935 case TYPE_LOAD:
2936 case TYPE_STORE:
2937 pipe_ls = i;
2938 case TYPE_LNOP:
2939 case TYPE_SHUF:
2940 case TYPE_BR:
2941 case TYPE_MULTI1:
2942 case TYPE_HBR:
2943 pipe_1 = i;
2944 break;
2945 case TYPE_IPREFETCH:
2946 pipe_hbrp = i;
2947 break;
2948 }
2949 }
2950
2951 /* In the first scheduling phase, schedule loads and stores together
2952 to increase the chance they will get merged during postreload CSE. */
2953 if (!reload_completed && pipe_ls >= 0)
2954 {
2955 insn = ready[pipe_ls];
2956 ready[pipe_ls] = ready[nready - 1];
2957 ready[nready - 1] = insn;
2958 return 1;
2959 }
2960
2961 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2962 if (pipe_hbrp >= 0)
2963 pipe_1 = pipe_hbrp;
2964
2965 /* When we have loads/stores in every cycle of the last 15 insns and
2966 we are about to schedule another load/store, emit an hbrp insn
2967 instead. */
2968 if (in_spu_reorg
2969 && spu_sched_length - spu_ls_first >= 4 * 15
2970 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2971 {
2972 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2973 recog_memoized (insn);
2974 if (pipe0_clock < clock)
2975 PUT_MODE (insn, TImode);
2976 spu_sched_variable_issue (file, verbose, insn, -1);
2977 return 0;
2978 }
2979
2980 /* In general, we want to emit nops to increase dual issue, but dual
2981 issue isn't faster when one of the insns could be scheduled later
2982 without effecting the critical path. We look at INSN_PRIORITY to
2983 make a good guess, but it isn't perfect so -mdual-nops=n can be
2984 used to effect it. */
2985 if (in_spu_reorg && spu_dual_nops < 10)
2986 {
2987 /* When we are at an even address and we are not issueing nops to
2988 improve scheduling then we need to advance the cycle. */
2989 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2990 && (spu_dual_nops == 0
2991 || (pipe_1 != -1
2992 && prev_priority >
2993 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2994 return 0;
2995
2996 /* When at an odd address, schedule the highest priority insn
2997 without considering pipeline. */
2998 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2999 && (spu_dual_nops == 0
3000 || (prev_priority >
3001 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3002 return 1;
3003 }
3004
3005
3006 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3007 pipe0 insn in the ready list, schedule it. */
3008 if (pipe0_clock < clock && pipe_0 >= 0)
3009 schedule_i = pipe_0;
3010
3011 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3012 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3013 else
3014 schedule_i = pipe_1;
3015
3016 if (schedule_i > -1)
3017 {
3018 insn = ready[schedule_i];
3019 ready[schedule_i] = ready[nready - 1];
3020 ready[nready - 1] = insn;
3021 return 1;
3022 }
3023 return 0;
644459d0 3024}
3025
3026/* INSN is dependent on DEP_INSN. */
3027static int
5a976006 3028spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3029{
5a976006 3030 rtx set;
3031
3032 /* The blockage pattern is used to prevent instructions from being
3033 moved across it and has no cost. */
3034 if (INSN_CODE (insn) == CODE_FOR_blockage
3035 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3036 return 0;
3037
3038 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3039 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3040 return 0;
3041
3042 /* Make sure hbrps are spread out. */
3043 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3044 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3045 return 8;
3046
3047 /* Make sure hints and hbrps are 2 cycles apart. */
3048 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3049 || INSN_CODE (insn) == CODE_FOR_hbr)
3050 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3051 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3052 return 2;
3053
3054 /* An hbrp has no real dependency on other insns. */
3055 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3056 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3057 return 0;
3058
3059 /* Assuming that it is unlikely an argument register will be used in
3060 the first cycle of the called function, we reduce the cost for
3061 slightly better scheduling of dep_insn. When not hinted, the
3062 mispredicted branch would hide the cost as well. */
3063 if (CALL_P (insn))
3064 {
3065 rtx target = get_branch_target (insn);
3066 if (GET_CODE (target) != REG || !set_of (target, insn))
3067 return cost - 2;
3068 return cost;
3069 }
3070
3071 /* And when returning from a function, let's assume the return values
3072 are completed sooner too. */
3073 if (CALL_P (dep_insn))
644459d0 3074 return cost - 2;
5a976006 3075
3076 /* Make sure an instruction that loads from the back chain is schedule
3077 away from the return instruction so a hint is more likely to get
3078 issued. */
3079 if (INSN_CODE (insn) == CODE_FOR__return
3080 && (set = single_set (dep_insn))
3081 && GET_CODE (SET_DEST (set)) == REG
3082 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3083 return 20;
3084
644459d0 3085 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3086 scheduler makes every insn in a block anti-dependent on the final
3087 jump_insn. We adjust here so higher cost insns will get scheduled
3088 earlier. */
5a976006 3089 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3090 return insn_cost (dep_insn) - 3;
5a976006 3091
644459d0 3092 return cost;
3093}
3094\f
3095/* Create a CONST_DOUBLE from a string. */
3096struct rtx_def *
3097spu_float_const (const char *string, enum machine_mode mode)
3098{
3099 REAL_VALUE_TYPE value;
3100 value = REAL_VALUE_ATOF (string, mode);
3101 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3102}
3103
644459d0 3104int
3105spu_constant_address_p (rtx x)
3106{
3107 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3108 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3109 || GET_CODE (x) == HIGH);
3110}
3111
3112static enum spu_immediate
3113which_immediate_load (HOST_WIDE_INT val)
3114{
3115 gcc_assert (val == trunc_int_for_mode (val, SImode));
3116
3117 if (val >= -0x8000 && val <= 0x7fff)
3118 return SPU_IL;
3119 if (val >= 0 && val <= 0x3ffff)
3120 return SPU_ILA;
3121 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3122 return SPU_ILH;
3123 if ((val & 0xffff) == 0)
3124 return SPU_ILHU;
3125
3126 return SPU_NONE;
3127}
3128
dea01258 3129/* Return true when OP can be loaded by one of the il instructions, or
3130 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3131int
3132immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3133{
3134 if (CONSTANT_P (op))
3135 {
3136 enum immediate_class c = classify_immediate (op, mode);
5df189be 3137 return c == IC_IL1 || c == IC_IL1s
3072d30e 3138 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3139 }
3140 return 0;
3141}
3142
3143/* Return true if the first SIZE bytes of arr is a constant that can be
3144 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3145 represent the size and offset of the instruction to use. */
3146static int
3147cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3148{
3149 int cpat, run, i, start;
3150 cpat = 1;
3151 run = 0;
3152 start = -1;
3153 for (i = 0; i < size && cpat; i++)
3154 if (arr[i] != i+16)
3155 {
3156 if (!run)
3157 {
3158 start = i;
3159 if (arr[i] == 3)
3160 run = 1;
3161 else if (arr[i] == 2 && arr[i+1] == 3)
3162 run = 2;
3163 else if (arr[i] == 0)
3164 {
3165 while (arr[i+run] == run && i+run < 16)
3166 run++;
3167 if (run != 4 && run != 8)
3168 cpat = 0;
3169 }
3170 else
3171 cpat = 0;
3172 if ((i & (run-1)) != 0)
3173 cpat = 0;
3174 i += run;
3175 }
3176 else
3177 cpat = 0;
3178 }
b01a6dc3 3179 if (cpat && (run || size < 16))
dea01258 3180 {
3181 if (run == 0)
3182 run = 1;
3183 if (prun)
3184 *prun = run;
3185 if (pstart)
3186 *pstart = start == -1 ? 16-run : start;
3187 return 1;
3188 }
3189 return 0;
3190}
3191
3192/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3193 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3194static enum immediate_class
3195classify_immediate (rtx op, enum machine_mode mode)
644459d0 3196{
3197 HOST_WIDE_INT val;
3198 unsigned char arr[16];
5df189be 3199 int i, j, repeated, fsmbi, repeat;
dea01258 3200
3201 gcc_assert (CONSTANT_P (op));
3202
644459d0 3203 if (GET_MODE (op) != VOIDmode)
3204 mode = GET_MODE (op);
3205
dea01258 3206 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3207 if (!flag_pic
3208 && mode == V4SImode
dea01258 3209 && GET_CODE (op) == CONST_VECTOR
3210 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3211 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3212 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3213 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3214 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3215 op = CONST_VECTOR_ELT (op, 0);
644459d0 3216
dea01258 3217 switch (GET_CODE (op))
3218 {
3219 case SYMBOL_REF:
3220 case LABEL_REF:
3221 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3222
dea01258 3223 case CONST:
0cfc65d4 3224 /* We can never know if the resulting address fits in 18 bits and can be
3225 loaded with ila. For now, assume the address will not overflow if
3226 the displacement is "small" (fits 'K' constraint). */
3227 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3228 {
3229 rtx sym = XEXP (XEXP (op, 0), 0);
3230 rtx cst = XEXP (XEXP (op, 0), 1);
3231
3232 if (GET_CODE (sym) == SYMBOL_REF
3233 && GET_CODE (cst) == CONST_INT
3234 && satisfies_constraint_K (cst))
3235 return IC_IL1s;
3236 }
3237 return IC_IL2s;
644459d0 3238
dea01258 3239 case HIGH:
3240 return IC_IL1s;
3241
3242 case CONST_VECTOR:
3243 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3244 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3245 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3246 return IC_POOL;
3247 /* Fall through. */
3248
3249 case CONST_INT:
3250 case CONST_DOUBLE:
3251 constant_to_array (mode, op, arr);
644459d0 3252
dea01258 3253 /* Check that each 4-byte slot is identical. */
3254 repeated = 1;
3255 for (i = 4; i < 16; i += 4)
3256 for (j = 0; j < 4; j++)
3257 if (arr[j] != arr[i + j])
3258 repeated = 0;
3259
3260 if (repeated)
3261 {
3262 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3263 val = trunc_int_for_mode (val, SImode);
3264
3265 if (which_immediate_load (val) != SPU_NONE)
3266 return IC_IL1;
3267 }
3268
3269 /* Any mode of 2 bytes or smaller can be loaded with an il
3270 instruction. */
3271 gcc_assert (GET_MODE_SIZE (mode) > 2);
3272
3273 fsmbi = 1;
5df189be 3274 repeat = 0;
dea01258 3275 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3276 if (arr[i] != 0 && repeat == 0)
3277 repeat = arr[i];
3278 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3279 fsmbi = 0;
3280 if (fsmbi)
5df189be 3281 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3282
3283 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3284 return IC_CPAT;
3285
3286 if (repeated)
3287 return IC_IL2;
3288
3289 return IC_POOL;
3290 default:
3291 break;
3292 }
3293 gcc_unreachable ();
644459d0 3294}
3295
3296static enum spu_immediate
3297which_logical_immediate (HOST_WIDE_INT val)
3298{
3299 gcc_assert (val == trunc_int_for_mode (val, SImode));
3300
3301 if (val >= -0x200 && val <= 0x1ff)
3302 return SPU_ORI;
3303 if (val >= 0 && val <= 0xffff)
3304 return SPU_IOHL;
3305 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3306 {
3307 val = trunc_int_for_mode (val, HImode);
3308 if (val >= -0x200 && val <= 0x1ff)
3309 return SPU_ORHI;
3310 if ((val & 0xff) == ((val >> 8) & 0xff))
3311 {
3312 val = trunc_int_for_mode (val, QImode);
3313 if (val >= -0x200 && val <= 0x1ff)
3314 return SPU_ORBI;
3315 }
3316 }
3317 return SPU_NONE;
3318}
3319
5df189be 3320/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3321 CONST_DOUBLEs. */
3322static int
3323const_vector_immediate_p (rtx x)
3324{
3325 int i;
3326 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3327 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3328 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3329 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3330 return 0;
3331 return 1;
3332}
3333
644459d0 3334int
3335logical_immediate_p (rtx op, enum machine_mode mode)
3336{
3337 HOST_WIDE_INT val;
3338 unsigned char arr[16];
3339 int i, j;
3340
3341 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3342 || GET_CODE (op) == CONST_VECTOR);
3343
5df189be 3344 if (GET_CODE (op) == CONST_VECTOR
3345 && !const_vector_immediate_p (op))
3346 return 0;
3347
644459d0 3348 if (GET_MODE (op) != VOIDmode)
3349 mode = GET_MODE (op);
3350
3351 constant_to_array (mode, op, arr);
3352
3353 /* Check that bytes are repeated. */
3354 for (i = 4; i < 16; i += 4)
3355 for (j = 0; j < 4; j++)
3356 if (arr[j] != arr[i + j])
3357 return 0;
3358
3359 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3360 val = trunc_int_for_mode (val, SImode);
3361
3362 i = which_logical_immediate (val);
3363 return i != SPU_NONE && i != SPU_IOHL;
3364}
3365
3366int
3367iohl_immediate_p (rtx op, enum machine_mode mode)
3368{
3369 HOST_WIDE_INT val;
3370 unsigned char arr[16];
3371 int i, j;
3372
3373 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3374 || GET_CODE (op) == CONST_VECTOR);
3375
5df189be 3376 if (GET_CODE (op) == CONST_VECTOR
3377 && !const_vector_immediate_p (op))
3378 return 0;
3379
644459d0 3380 if (GET_MODE (op) != VOIDmode)
3381 mode = GET_MODE (op);
3382
3383 constant_to_array (mode, op, arr);
3384
3385 /* Check that bytes are repeated. */
3386 for (i = 4; i < 16; i += 4)
3387 for (j = 0; j < 4; j++)
3388 if (arr[j] != arr[i + j])
3389 return 0;
3390
3391 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3392 val = trunc_int_for_mode (val, SImode);
3393
3394 return val >= 0 && val <= 0xffff;
3395}
3396
3397int
3398arith_immediate_p (rtx op, enum machine_mode mode,
3399 HOST_WIDE_INT low, HOST_WIDE_INT high)
3400{
3401 HOST_WIDE_INT val;
3402 unsigned char arr[16];
3403 int bytes, i, j;
3404
3405 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3406 || GET_CODE (op) == CONST_VECTOR);
3407
5df189be 3408 if (GET_CODE (op) == CONST_VECTOR
3409 && !const_vector_immediate_p (op))
3410 return 0;
3411
644459d0 3412 if (GET_MODE (op) != VOIDmode)
3413 mode = GET_MODE (op);
3414
3415 constant_to_array (mode, op, arr);
3416
3417 if (VECTOR_MODE_P (mode))
3418 mode = GET_MODE_INNER (mode);
3419
3420 bytes = GET_MODE_SIZE (mode);
3421 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3422
3423 /* Check that bytes are repeated. */
3424 for (i = bytes; i < 16; i += bytes)
3425 for (j = 0; j < bytes; j++)
3426 if (arr[j] != arr[i + j])
3427 return 0;
3428
3429 val = arr[0];
3430 for (j = 1; j < bytes; j++)
3431 val = (val << 8) | arr[j];
3432
3433 val = trunc_int_for_mode (val, mode);
3434
3435 return val >= low && val <= high;
3436}
3437
3438/* We accept:
5b865faf 3439 - any 32-bit constant (SImode, SFmode)
644459d0 3440 - any constant that can be generated with fsmbi (any mode)
5b865faf 3441 - a 64-bit constant where the high and low bits are identical
644459d0 3442 (DImode, DFmode)
5b865faf 3443 - a 128-bit constant where the four 32-bit words match. */
644459d0 3444int
3445spu_legitimate_constant_p (rtx x)
3446{
5df189be 3447 if (GET_CODE (x) == HIGH)
3448 x = XEXP (x, 0);
644459d0 3449 /* V4SI with all identical symbols is valid. */
5df189be 3450 if (!flag_pic
3451 && GET_MODE (x) == V4SImode
644459d0 3452 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3453 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3454 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3455 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3456 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3457 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3458
5df189be 3459 if (GET_CODE (x) == CONST_VECTOR
3460 && !const_vector_immediate_p (x))
3461 return 0;
644459d0 3462 return 1;
3463}
3464
3465/* Valid address are:
3466 - symbol_ref, label_ref, const
3467 - reg
3468 - reg + const, where either reg or const is 16 byte aligned
3469 - reg + reg, alignment doesn't matter
3470 The alignment matters in the reg+const case because lqd and stqd
3471 ignore the 4 least significant bits of the const. (TODO: It might be
3472 preferable to allow any alignment and fix it up when splitting.) */
3473int
3474spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3475 rtx x, int reg_ok_strict)
3476{
3477 if (mode == TImode && GET_CODE (x) == AND
3478 && GET_CODE (XEXP (x, 1)) == CONST_INT
3479 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3480 x = XEXP (x, 0);
3481 switch (GET_CODE (x))
3482 {
3483 case SYMBOL_REF:
3484 case LABEL_REF:
3485 return !TARGET_LARGE_MEM;
3486
3487 case CONST:
0cfc65d4 3488 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3489 {
3490 rtx sym = XEXP (XEXP (x, 0), 0);
3491 rtx cst = XEXP (XEXP (x, 0), 1);
3492
3493 /* Accept any symbol_ref + constant, assuming it does not
3494 wrap around the local store addressability limit. */
3495 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3496 return 1;
3497 }
3498 return 0;
644459d0 3499
3500 case CONST_INT:
3501 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3502
3503 case SUBREG:
3504 x = XEXP (x, 0);
3505 gcc_assert (GET_CODE (x) == REG);
3506
3507 case REG:
3508 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3509
3510 case PLUS:
3511 case LO_SUM:
3512 {
3513 rtx op0 = XEXP (x, 0);
3514 rtx op1 = XEXP (x, 1);
3515 if (GET_CODE (op0) == SUBREG)
3516 op0 = XEXP (op0, 0);
3517 if (GET_CODE (op1) == SUBREG)
3518 op1 = XEXP (op1, 0);
3519 /* We can't just accept any aligned register because CSE can
3520 change it to a register that is not marked aligned and then
3521 recog will fail. So we only accept frame registers because
3522 they will only be changed to other frame registers. */
3523 if (GET_CODE (op0) == REG
3524 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3525 && GET_CODE (op1) == CONST_INT
3526 && INTVAL (op1) >= -0x2000
3527 && INTVAL (op1) <= 0x1fff
5df189be 3528 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 3529 return 1;
3530 if (GET_CODE (op0) == REG
3531 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3532 && GET_CODE (op1) == REG
3533 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3534 return 1;
3535 }
3536 break;
3537
3538 default:
3539 break;
3540 }
3541 return 0;
3542}
3543
3544/* When the address is reg + const_int, force the const_int into a
fa7637bd 3545 register. */
644459d0 3546rtx
3547spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3548 enum machine_mode mode)
3549{
3550 rtx op0, op1;
3551 /* Make sure both operands are registers. */
3552 if (GET_CODE (x) == PLUS)
3553 {
3554 op0 = XEXP (x, 0);
3555 op1 = XEXP (x, 1);
3556 if (ALIGNED_SYMBOL_REF_P (op0))
3557 {
3558 op0 = force_reg (Pmode, op0);
3559 mark_reg_pointer (op0, 128);
3560 }
3561 else if (GET_CODE (op0) != REG)
3562 op0 = force_reg (Pmode, op0);
3563 if (ALIGNED_SYMBOL_REF_P (op1))
3564 {
3565 op1 = force_reg (Pmode, op1);
3566 mark_reg_pointer (op1, 128);
3567 }
3568 else if (GET_CODE (op1) != REG)
3569 op1 = force_reg (Pmode, op1);
3570 x = gen_rtx_PLUS (Pmode, op0, op1);
3571 if (spu_legitimate_address (mode, x, 0))
3572 return x;
3573 }
3574 return NULL_RTX;
3575}
3576
3577/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3578 struct attribute_spec.handler. */
3579static tree
3580spu_handle_fndecl_attribute (tree * node,
3581 tree name,
3582 tree args ATTRIBUTE_UNUSED,
3583 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3584{
3585 if (TREE_CODE (*node) != FUNCTION_DECL)
3586 {
3587 warning (0, "`%s' attribute only applies to functions",
3588 IDENTIFIER_POINTER (name));
3589 *no_add_attrs = true;
3590 }
3591
3592 return NULL_TREE;
3593}
3594
3595/* Handle the "vector" attribute. */
3596static tree
3597spu_handle_vector_attribute (tree * node, tree name,
3598 tree args ATTRIBUTE_UNUSED,
3599 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3600{
3601 tree type = *node, result = NULL_TREE;
3602 enum machine_mode mode;
3603 int unsigned_p;
3604
3605 while (POINTER_TYPE_P (type)
3606 || TREE_CODE (type) == FUNCTION_TYPE
3607 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3608 type = TREE_TYPE (type);
3609
3610 mode = TYPE_MODE (type);
3611
3612 unsigned_p = TYPE_UNSIGNED (type);
3613 switch (mode)
3614 {
3615 case DImode:
3616 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3617 break;
3618 case SImode:
3619 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3620 break;
3621 case HImode:
3622 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3623 break;
3624 case QImode:
3625 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3626 break;
3627 case SFmode:
3628 result = V4SF_type_node;
3629 break;
3630 case DFmode:
3631 result = V2DF_type_node;
3632 break;
3633 default:
3634 break;
3635 }
3636
3637 /* Propagate qualifiers attached to the element type
3638 onto the vector type. */
3639 if (result && result != type && TYPE_QUALS (type))
3640 result = build_qualified_type (result, TYPE_QUALS (type));
3641
3642 *no_add_attrs = true; /* No need to hang on to the attribute. */
3643
3644 if (!result)
3645 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3646 else
d991e6e8 3647 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3648
3649 return NULL_TREE;
3650}
3651
f2b32076 3652/* Return nonzero if FUNC is a naked function. */
644459d0 3653static int
3654spu_naked_function_p (tree func)
3655{
3656 tree a;
3657
3658 if (TREE_CODE (func) != FUNCTION_DECL)
3659 abort ();
3660
3661 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3662 return a != NULL_TREE;
3663}
3664
3665int
3666spu_initial_elimination_offset (int from, int to)
3667{
3668 int saved_regs_size = spu_saved_regs_size ();
3669 int sp_offset = 0;
abe32cce 3670 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3671 || get_frame_size () || saved_regs_size)
3672 sp_offset = STACK_POINTER_OFFSET;
3673 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3674 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3675 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3676 return get_frame_size ();
644459d0 3677 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3678 return sp_offset + crtl->outgoing_args_size
644459d0 3679 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3680 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3681 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3682 else
3683 gcc_unreachable ();
644459d0 3684}
3685
3686rtx
fb80456a 3687spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3688{
3689 enum machine_mode mode = TYPE_MODE (type);
3690 int byte_size = ((mode == BLKmode)
3691 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3692
3693 /* Make sure small structs are left justified in a register. */
3694 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3695 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3696 {
3697 enum machine_mode smode;
3698 rtvec v;
3699 int i;
3700 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3701 int n = byte_size / UNITS_PER_WORD;
3702 v = rtvec_alloc (nregs);
3703 for (i = 0; i < n; i++)
3704 {
3705 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3706 gen_rtx_REG (TImode,
3707 FIRST_RETURN_REGNUM
3708 + i),
3709 GEN_INT (UNITS_PER_WORD * i));
3710 byte_size -= UNITS_PER_WORD;
3711 }
3712
3713 if (n < nregs)
3714 {
3715 if (byte_size < 4)
3716 byte_size = 4;
3717 smode =
3718 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3719 RTVEC_ELT (v, n) =
3720 gen_rtx_EXPR_LIST (VOIDmode,
3721 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3722 GEN_INT (UNITS_PER_WORD * n));
3723 }
3724 return gen_rtx_PARALLEL (mode, v);
3725 }
3726 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3727}
3728
3729rtx
3730spu_function_arg (CUMULATIVE_ARGS cum,
3731 enum machine_mode mode,
3732 tree type, int named ATTRIBUTE_UNUSED)
3733{
3734 int byte_size;
3735
3736 if (cum >= MAX_REGISTER_ARGS)
3737 return 0;
3738
3739 byte_size = ((mode == BLKmode)
3740 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3741
3742 /* The ABI does not allow parameters to be passed partially in
3743 reg and partially in stack. */
3744 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3745 return 0;
3746
3747 /* Make sure small structs are left justified in a register. */
3748 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3749 && byte_size < UNITS_PER_WORD && byte_size > 0)
3750 {
3751 enum machine_mode smode;
3752 rtx gr_reg;
3753 if (byte_size < 4)
3754 byte_size = 4;
3755 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3756 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3757 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3758 const0_rtx);
3759 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3760 }
3761 else
3762 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3763}
3764
3765/* Variable sized types are passed by reference. */
3766static bool
3767spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3768 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3769 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3770{
3771 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3772}
3773\f
3774
3775/* Var args. */
3776
3777/* Create and return the va_list datatype.
3778
3779 On SPU, va_list is an array type equivalent to
3780
3781 typedef struct __va_list_tag
3782 {
3783 void *__args __attribute__((__aligned(16)));
3784 void *__skip __attribute__((__aligned(16)));
3785
3786 } va_list[1];
3787
fa7637bd 3788 where __args points to the arg that will be returned by the next
644459d0 3789 va_arg(), and __skip points to the previous stack frame such that
3790 when __args == __skip we should advance __args by 32 bytes. */
3791static tree
3792spu_build_builtin_va_list (void)
3793{
3794 tree f_args, f_skip, record, type_decl;
3795 bool owp;
3796
3797 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3798
3799 type_decl =
3800 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3801
3802 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3803 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3804
3805 DECL_FIELD_CONTEXT (f_args) = record;
3806 DECL_ALIGN (f_args) = 128;
3807 DECL_USER_ALIGN (f_args) = 1;
3808
3809 DECL_FIELD_CONTEXT (f_skip) = record;
3810 DECL_ALIGN (f_skip) = 128;
3811 DECL_USER_ALIGN (f_skip) = 1;
3812
3813 TREE_CHAIN (record) = type_decl;
3814 TYPE_NAME (record) = type_decl;
3815 TYPE_FIELDS (record) = f_args;
3816 TREE_CHAIN (f_args) = f_skip;
3817
3818 /* We know this is being padded and we want it too. It is an internal
3819 type so hide the warnings from the user. */
3820 owp = warn_padded;
3821 warn_padded = false;
3822
3823 layout_type (record);
3824
3825 warn_padded = owp;
3826
3827 /* The correct type is an array type of one element. */
3828 return build_array_type (record, build_index_type (size_zero_node));
3829}
3830
3831/* Implement va_start by filling the va_list structure VALIST.
3832 NEXTARG points to the first anonymous stack argument.
3833
3834 The following global variables are used to initialize
3835 the va_list structure:
3836
abe32cce 3837 crtl->args.info;
644459d0 3838 the CUMULATIVE_ARGS for this function
3839
abe32cce 3840 crtl->args.arg_offset_rtx:
644459d0 3841 holds the offset of the first anonymous stack argument
3842 (relative to the virtual arg pointer). */
3843
8a58ed0a 3844static void
644459d0 3845spu_va_start (tree valist, rtx nextarg)
3846{
3847 tree f_args, f_skip;
3848 tree args, skip, t;
3849
3850 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3851 f_skip = TREE_CHAIN (f_args);
3852
3853 valist = build_va_arg_indirect_ref (valist);
3854 args =
3855 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3856 skip =
3857 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3858
3859 /* Find the __args area. */
3860 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3861 if (crtl->args.pretend_args_size > 0)
0de36bdb 3862 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3863 size_int (-STACK_POINTER_OFFSET));
75a70cf9 3864 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 3865 TREE_SIDE_EFFECTS (t) = 1;
3866 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3867
3868 /* Find the __skip area. */
3869 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 3870 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 3871 size_int (crtl->args.pretend_args_size
0de36bdb 3872 - STACK_POINTER_OFFSET));
75a70cf9 3873 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 3874 TREE_SIDE_EFFECTS (t) = 1;
3875 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3876}
3877
3878/* Gimplify va_arg by updating the va_list structure
3879 VALIST as required to retrieve an argument of type
3880 TYPE, and returning that argument.
3881
3882 ret = va_arg(VALIST, TYPE);
3883
3884 generates code equivalent to:
3885
3886 paddedsize = (sizeof(TYPE) + 15) & -16;
3887 if (VALIST.__args + paddedsize > VALIST.__skip
3888 && VALIST.__args <= VALIST.__skip)
3889 addr = VALIST.__skip + 32;
3890 else
3891 addr = VALIST.__args;
3892 VALIST.__args = addr + paddedsize;
3893 ret = *(TYPE *)addr;
3894 */
3895static tree
75a70cf9 3896spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3897 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 3898{
3899 tree f_args, f_skip;
3900 tree args, skip;
3901 HOST_WIDE_INT size, rsize;
3902 tree paddedsize, addr, tmp;
3903 bool pass_by_reference_p;
3904
3905 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3906 f_skip = TREE_CHAIN (f_args);
3907
3908 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3909 args =
3910 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3911 skip =
3912 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3913
3914 addr = create_tmp_var (ptr_type_node, "va_arg");
3915 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3916
3917 /* if an object is dynamically sized, a pointer to it is passed
3918 instead of the object itself. */
3919 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3920 false);
3921 if (pass_by_reference_p)
3922 type = build_pointer_type (type);
3923 size = int_size_in_bytes (type);
3924 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3925
3926 /* build conditional expression to calculate addr. The expression
3927 will be gimplified later. */
0de36bdb 3928 paddedsize = size_int (rsize);
75a70cf9 3929 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 3930 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 3931 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3932 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3933 unshare_expr (skip)));
644459d0 3934
3935 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 3936 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3937 size_int (32)), unshare_expr (args));
644459d0 3938
75a70cf9 3939 gimplify_assign (addr, tmp, pre_p);
644459d0 3940
3941 /* update VALIST.__args */
0de36bdb 3942 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 3943 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 3944
3945 addr = fold_convert (build_pointer_type (type), addr);
3946
3947 if (pass_by_reference_p)
3948 addr = build_va_arg_indirect_ref (addr);
3949
3950 return build_va_arg_indirect_ref (addr);
3951}
3952
3953/* Save parameter registers starting with the register that corresponds
3954 to the first unnamed parameters. If the first unnamed parameter is
3955 in the stack then save no registers. Set pretend_args_size to the
3956 amount of space needed to save the registers. */
3957void
3958spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3959 tree type, int *pretend_size, int no_rtl)
3960{
3961 if (!no_rtl)
3962 {
3963 rtx tmp;
3964 int regno;
3965 int offset;
3966 int ncum = *cum;
3967
3968 /* cum currently points to the last named argument, we want to
3969 start at the next argument. */
3970 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3971
3972 offset = -STACK_POINTER_OFFSET;
3973 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3974 {
3975 tmp = gen_frame_mem (V4SImode,
3976 plus_constant (virtual_incoming_args_rtx,
3977 offset));
3978 emit_move_insn (tmp,
3979 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3980 offset += 16;
3981 }
3982 *pretend_size = offset + STACK_POINTER_OFFSET;
3983 }
3984}
3985\f
3986void
3987spu_conditional_register_usage (void)
3988{
3989 if (flag_pic)
3990 {
3991 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3992 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3993 }
644459d0 3994}
3995
3996/* This is called to decide when we can simplify a load instruction. We
3997 must only return true for registers which we know will always be
3998 aligned. Taking into account that CSE might replace this reg with
3999 another one that has not been marked aligned.
4000 So this is really only true for frame, stack and virtual registers,
fa7637bd 4001 which we know are always aligned and should not be adversely effected
4002 by CSE. */
644459d0 4003static int
4004regno_aligned_for_load (int regno)
4005{
4006 return regno == FRAME_POINTER_REGNUM
5df189be 4007 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
aa71ecd4 4008 || regno == ARG_POINTER_REGNUM
644459d0 4009 || regno == STACK_POINTER_REGNUM
5df189be 4010 || (regno >= FIRST_VIRTUAL_REGISTER
4011 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 4012}
4013
4014/* Return TRUE when mem is known to be 16-byte aligned. */
4015int
4016aligned_mem_p (rtx mem)
4017{
4018 if (MEM_ALIGN (mem) >= 128)
4019 return 1;
4020 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4021 return 1;
4022 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4023 {
4024 rtx p0 = XEXP (XEXP (mem, 0), 0);
4025 rtx p1 = XEXP (XEXP (mem, 0), 1);
4026 if (regno_aligned_for_load (REGNO (p0)))
4027 {
4028 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4029 return 1;
4030 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4031 return 1;
4032 }
4033 }
4034 else if (GET_CODE (XEXP (mem, 0)) == REG)
4035 {
4036 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4037 return 1;
4038 }
4039 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4040 return 1;
4041 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4042 {
4043 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4044 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4045 if (GET_CODE (p0) == SYMBOL_REF
4046 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4047 return 1;
4048 }
4049 return 0;
4050}
4051
69ced2d6 4052/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4053 into its SYMBOL_REF_FLAGS. */
4054static void
4055spu_encode_section_info (tree decl, rtx rtl, int first)
4056{
4057 default_encode_section_info (decl, rtl, first);
4058
4059 /* If a variable has a forced alignment to < 16 bytes, mark it with
4060 SYMBOL_FLAG_ALIGN1. */
4061 if (TREE_CODE (decl) == VAR_DECL
4062 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4063 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4064}
4065
644459d0 4066/* Return TRUE if we are certain the mem refers to a complete object
4067 which is both 16-byte aligned and padded to a 16-byte boundary. This
4068 would make it safe to store with a single instruction.
4069 We guarantee the alignment and padding for static objects by aligning
4070 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4071 FIXME: We currently cannot guarantee this for objects on the stack
4072 because assign_parm_setup_stack calls assign_stack_local with the
4073 alignment of the parameter mode and in that case the alignment never
4074 gets adjusted by LOCAL_ALIGNMENT. */
4075static int
4076store_with_one_insn_p (rtx mem)
4077{
4078 rtx addr = XEXP (mem, 0);
4079 if (GET_MODE (mem) == BLKmode)
4080 return 0;
4081 /* Only static objects. */
4082 if (GET_CODE (addr) == SYMBOL_REF)
4083 {
4084 /* We use the associated declaration to make sure the access is
fa7637bd 4085 referring to the whole object.
644459d0 4086 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4087 if it is necessary. Will there be cases where one exists, and
4088 the other does not? Will there be cases where both exist, but
4089 have different types? */
4090 tree decl = MEM_EXPR (mem);
4091 if (decl
4092 && TREE_CODE (decl) == VAR_DECL
4093 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4094 return 1;
4095 decl = SYMBOL_REF_DECL (addr);
4096 if (decl
4097 && TREE_CODE (decl) == VAR_DECL
4098 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4099 return 1;
4100 }
4101 return 0;
4102}
4103
4104int
4105spu_expand_mov (rtx * ops, enum machine_mode mode)
4106{
4107 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4108 abort ();
4109
4110 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4111 {
4112 rtx from = SUBREG_REG (ops[1]);
4113 enum machine_mode imode = GET_MODE (from);
4114
4115 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4116 && GET_MODE_CLASS (imode) == MODE_INT
4117 && subreg_lowpart_p (ops[1]));
4118
4119 if (GET_MODE_SIZE (imode) < 4)
4120 {
4121 from = gen_rtx_SUBREG (SImode, from, 0);
4122 imode = SImode;
4123 }
4124
4125 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4126 {
99bdde56 4127 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4128 emit_insn (GEN_FCN (icode) (ops[0], from));
4129 }
4130 else
4131 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4132 return 1;
4133 }
4134
4135 /* At least one of the operands needs to be a register. */
4136 if ((reload_in_progress | reload_completed) == 0
4137 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4138 {
4139 rtx temp = force_reg (mode, ops[1]);
4140 emit_move_insn (ops[0], temp);
4141 return 1;
4142 }
4143 if (reload_in_progress || reload_completed)
4144 {
dea01258 4145 if (CONSTANT_P (ops[1]))
4146 return spu_split_immediate (ops);
644459d0 4147 return 0;
4148 }
4149 else
4150 {
4151 if (GET_CODE (ops[0]) == MEM)
4152 {
4153 if (!spu_valid_move (ops))
4154 {
4155 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4156 gen_reg_rtx (TImode)));
4157 return 1;
4158 }
4159 }
4160 else if (GET_CODE (ops[1]) == MEM)
4161 {
4162 if (!spu_valid_move (ops))
4163 {
4164 emit_insn (gen_load
4165 (ops[0], ops[1], gen_reg_rtx (TImode),
4166 gen_reg_rtx (SImode)));
4167 return 1;
4168 }
4169 }
4170 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4171 extend them. */
4172 if (GET_CODE (ops[1]) == CONST_INT)
4173 {
4174 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4175 if (val != INTVAL (ops[1]))
4176 {
4177 emit_move_insn (ops[0], GEN_INT (val));
4178 return 1;
4179 }
4180 }
4181 }
4182 return 0;
4183}
4184
644459d0 4185void
4186spu_split_load (rtx * ops)
4187{
4188 enum machine_mode mode = GET_MODE (ops[0]);
4189 rtx addr, load, rot, mem, p0, p1;
4190 int rot_amt;
4191
4192 addr = XEXP (ops[1], 0);
4193
4194 rot = 0;
4195 rot_amt = 0;
4196 if (GET_CODE (addr) == PLUS)
4197 {
4198 /* 8 cases:
4199 aligned reg + aligned reg => lqx
4200 aligned reg + unaligned reg => lqx, rotqby
4201 aligned reg + aligned const => lqd
4202 aligned reg + unaligned const => lqd, rotqbyi
4203 unaligned reg + aligned reg => lqx, rotqby
4204 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4205 unaligned reg + aligned const => lqd, rotqby
4206 unaligned reg + unaligned const -> not allowed by legitimate address
4207 */
4208 p0 = XEXP (addr, 0);
4209 p1 = XEXP (addr, 1);
aa71ecd4 4210 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
644459d0 4211 {
aa71ecd4 4212 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4213 {
4214 emit_insn (gen_addsi3 (ops[3], p0, p1));
4215 rot = ops[3];
4216 }
4217 else
4218 rot = p0;
4219 }
4220 else
4221 {
4222 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4223 {
4224 rot_amt = INTVAL (p1) & 15;
4225 p1 = GEN_INT (INTVAL (p1) & -16);
4226 addr = gen_rtx_PLUS (SImode, p0, p1);
4227 }
aa71ecd4 4228 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4229 rot = p1;
4230 }
4231 }
4232 else if (GET_CODE (addr) == REG)
4233 {
aa71ecd4 4234 if (!regno_aligned_for_load (REGNO (addr)))
644459d0 4235 rot = addr;
4236 }
4237 else if (GET_CODE (addr) == CONST)
4238 {
4239 if (GET_CODE (XEXP (addr, 0)) == PLUS
4240 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4241 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4242 {
4243 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4244 if (rot_amt & -16)
4245 addr = gen_rtx_CONST (Pmode,
4246 gen_rtx_PLUS (Pmode,
4247 XEXP (XEXP (addr, 0), 0),
4248 GEN_INT (rot_amt & -16)));
4249 else
4250 addr = XEXP (XEXP (addr, 0), 0);
4251 }
4252 else
4253 rot = addr;
4254 }
4255 else if (GET_CODE (addr) == CONST_INT)
4256 {
4257 rot_amt = INTVAL (addr);
4258 addr = GEN_INT (rot_amt & -16);
4259 }
4260 else if (!ALIGNED_SYMBOL_REF_P (addr))
4261 rot = addr;
4262
4263 if (GET_MODE_SIZE (mode) < 4)
4264 rot_amt += GET_MODE_SIZE (mode) - 4;
4265
4266 rot_amt &= 15;
4267
4268 if (rot && rot_amt)
4269 {
4270 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4271 rot = ops[3];
4272 rot_amt = 0;
4273 }
4274
4275 load = ops[2];
4276
4277 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4278 mem = change_address (ops[1], TImode, addr);
4279
e04cf423 4280 emit_insn (gen_movti (load, mem));
644459d0 4281
4282 if (rot)
4283 emit_insn (gen_rotqby_ti (load, load, rot));
4284 else if (rot_amt)
4285 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4286
4287 if (reload_completed)
4288 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4289 else
4290 emit_insn (gen_spu_convert (ops[0], load));
4291}
4292
4293void
4294spu_split_store (rtx * ops)
4295{
4296 enum machine_mode mode = GET_MODE (ops[0]);
4297 rtx pat = ops[2];
4298 rtx reg = ops[3];
4299 rtx addr, p0, p1, p1_lo, smem;
4300 int aform;
4301 int scalar;
4302
4303 addr = XEXP (ops[0], 0);
4304
4305 if (GET_CODE (addr) == PLUS)
4306 {
4307 /* 8 cases:
4308 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4309 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4310 aligned reg + aligned const => lqd, c?d, shuf, stqx
4311 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4312 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4313 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4314 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4315 unaligned reg + unaligned const -> not allowed by legitimate address
4316 */
4317 aform = 0;
4318 p0 = XEXP (addr, 0);
4319 p1 = p1_lo = XEXP (addr, 1);
4320 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4321 {
4322 p1_lo = GEN_INT (INTVAL (p1) & 15);
4323 p1 = GEN_INT (INTVAL (p1) & -16);
4324 addr = gen_rtx_PLUS (SImode, p0, p1);
4325 }
4326 }
4327 else if (GET_CODE (addr) == REG)
4328 {
4329 aform = 0;
4330 p0 = addr;
4331 p1 = p1_lo = const0_rtx;
4332 }
4333 else
4334 {
4335 aform = 1;
4336 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4337 p1 = 0; /* aform doesn't use p1 */
4338 p1_lo = addr;
4339 if (ALIGNED_SYMBOL_REF_P (addr))
4340 p1_lo = const0_rtx;
4341 else if (GET_CODE (addr) == CONST)
4342 {
4343 if (GET_CODE (XEXP (addr, 0)) == PLUS
4344 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4345 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4346 {
4347 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4348 if ((v & -16) != 0)
4349 addr = gen_rtx_CONST (Pmode,
4350 gen_rtx_PLUS (Pmode,
4351 XEXP (XEXP (addr, 0), 0),
4352 GEN_INT (v & -16)));
4353 else
4354 addr = XEXP (XEXP (addr, 0), 0);
4355 p1_lo = GEN_INT (v & 15);
4356 }
4357 }
4358 else if (GET_CODE (addr) == CONST_INT)
4359 {
4360 p1_lo = GEN_INT (INTVAL (addr) & 15);
4361 addr = GEN_INT (INTVAL (addr) & -16);
4362 }
4363 }
4364
e04cf423 4365 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4366
644459d0 4367 scalar = store_with_one_insn_p (ops[0]);
4368 if (!scalar)
4369 {
4370 /* We could copy the flags from the ops[0] MEM to mem here,
4371 We don't because we want this load to be optimized away if
4372 possible, and copying the flags will prevent that in certain
4373 cases, e.g. consider the volatile flag. */
4374
e04cf423 4375 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4376 set_mem_alias_set (lmem, 0);
4377 emit_insn (gen_movti (reg, lmem));
644459d0 4378
aa71ecd4 4379 if (!p0 || regno_aligned_for_load (REGNO (p0)))
644459d0 4380 p0 = stack_pointer_rtx;
4381 if (!p1_lo)
4382 p1_lo = const0_rtx;
4383
4384 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4385 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4386 }
4387 else if (reload_completed)
4388 {
4389 if (GET_CODE (ops[1]) == REG)
4390 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4391 else if (GET_CODE (ops[1]) == SUBREG)
4392 emit_move_insn (reg,
4393 gen_rtx_REG (GET_MODE (reg),
4394 REGNO (SUBREG_REG (ops[1]))));
4395 else
4396 abort ();
4397 }
4398 else
4399 {
4400 if (GET_CODE (ops[1]) == REG)
4401 emit_insn (gen_spu_convert (reg, ops[1]));
4402 else if (GET_CODE (ops[1]) == SUBREG)
4403 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4404 else
4405 abort ();
4406 }
4407
4408 if (GET_MODE_SIZE (mode) < 4 && scalar)
4409 emit_insn (gen_shlqby_ti
4410 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4411
644459d0 4412 smem = change_address (ops[0], TImode, addr);
4413 /* We can't use the previous alias set because the memory has changed
4414 size and can potentially overlap objects of other types. */
4415 set_mem_alias_set (smem, 0);
4416
e04cf423 4417 emit_insn (gen_movti (smem, reg));
644459d0 4418}
4419
4420/* Return TRUE if X is MEM which is a struct member reference
4421 and the member can safely be loaded and stored with a single
4422 instruction because it is padded. */
4423static int
4424mem_is_padded_component_ref (rtx x)
4425{
4426 tree t = MEM_EXPR (x);
4427 tree r;
4428 if (!t || TREE_CODE (t) != COMPONENT_REF)
4429 return 0;
4430 t = TREE_OPERAND (t, 1);
4431 if (!t || TREE_CODE (t) != FIELD_DECL
4432 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4433 return 0;
4434 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4435 r = DECL_FIELD_CONTEXT (t);
4436 if (!r || TREE_CODE (r) != RECORD_TYPE)
4437 return 0;
4438 /* Make sure they are the same mode */
4439 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4440 return 0;
4441 /* If there are no following fields then the field alignment assures
fa7637bd 4442 the structure is padded to the alignment which means this field is
4443 padded too. */
644459d0 4444 if (TREE_CHAIN (t) == 0)
4445 return 1;
4446 /* If the following field is also aligned then this field will be
4447 padded. */
4448 t = TREE_CHAIN (t);
4449 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4450 return 1;
4451 return 0;
4452}
4453
c7b91b14 4454/* Parse the -mfixed-range= option string. */
4455static void
4456fix_range (const char *const_str)
4457{
4458 int i, first, last;
4459 char *str, *dash, *comma;
4460
4461 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4462 REG2 are either register names or register numbers. The effect
4463 of this option is to mark the registers in the range from REG1 to
4464 REG2 as ``fixed'' so they won't be used by the compiler. */
4465
4466 i = strlen (const_str);
4467 str = (char *) alloca (i + 1);
4468 memcpy (str, const_str, i + 1);
4469
4470 while (1)
4471 {
4472 dash = strchr (str, '-');
4473 if (!dash)
4474 {
4475 warning (0, "value of -mfixed-range must have form REG1-REG2");
4476 return;
4477 }
4478 *dash = '\0';
4479 comma = strchr (dash + 1, ',');
4480 if (comma)
4481 *comma = '\0';
4482
4483 first = decode_reg_name (str);
4484 if (first < 0)
4485 {
4486 warning (0, "unknown register name: %s", str);
4487 return;
4488 }
4489
4490 last = decode_reg_name (dash + 1);
4491 if (last < 0)
4492 {
4493 warning (0, "unknown register name: %s", dash + 1);
4494 return;
4495 }
4496
4497 *dash = '-';
4498
4499 if (first > last)
4500 {
4501 warning (0, "%s-%s is an empty range", str, dash + 1);
4502 return;
4503 }
4504
4505 for (i = first; i <= last; ++i)
4506 fixed_regs[i] = call_used_regs[i] = 1;
4507
4508 if (!comma)
4509 break;
4510
4511 *comma = ',';
4512 str = comma + 1;
4513 }
4514}
4515
644459d0 4516int
4517spu_valid_move (rtx * ops)
4518{
4519 enum machine_mode mode = GET_MODE (ops[0]);
4520 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4521 return 0;
4522
4523 /* init_expr_once tries to recog against load and store insns to set
4524 the direct_load[] and direct_store[] arrays. We always want to
4525 consider those loads and stores valid. init_expr_once is called in
4526 the context of a dummy function which does not have a decl. */
4527 if (cfun->decl == 0)
4528 return 1;
4529
4530 /* Don't allows loads/stores which would require more than 1 insn.
4531 During and after reload we assume loads and stores only take 1
4532 insn. */
4533 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4534 {
4535 if (GET_CODE (ops[0]) == MEM
4536 && (GET_MODE_SIZE (mode) < 4
4537 || !(store_with_one_insn_p (ops[0])
4538 || mem_is_padded_component_ref (ops[0]))))
4539 return 0;
4540 if (GET_CODE (ops[1]) == MEM
4541 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4542 return 0;
4543 }
4544 return 1;
4545}
4546
4547/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4548 can be generated using the fsmbi instruction. */
4549int
4550fsmbi_const_p (rtx x)
4551{
dea01258 4552 if (CONSTANT_P (x))
4553 {
5df189be 4554 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4555 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4556 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4557 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4558 }
4559 return 0;
4560}
4561
4562/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4563 can be generated using the cbd, chd, cwd or cdd instruction. */
4564int
4565cpat_const_p (rtx x, enum machine_mode mode)
4566{
4567 if (CONSTANT_P (x))
4568 {
4569 enum immediate_class c = classify_immediate (x, mode);
4570 return c == IC_CPAT;
4571 }
4572 return 0;
4573}
644459d0 4574
dea01258 4575rtx
4576gen_cpat_const (rtx * ops)
4577{
4578 unsigned char dst[16];
4579 int i, offset, shift, isize;
4580 if (GET_CODE (ops[3]) != CONST_INT
4581 || GET_CODE (ops[2]) != CONST_INT
4582 || (GET_CODE (ops[1]) != CONST_INT
4583 && GET_CODE (ops[1]) != REG))
4584 return 0;
4585 if (GET_CODE (ops[1]) == REG
4586 && (!REG_POINTER (ops[1])
4587 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4588 return 0;
644459d0 4589
4590 for (i = 0; i < 16; i++)
dea01258 4591 dst[i] = i + 16;
4592 isize = INTVAL (ops[3]);
4593 if (isize == 1)
4594 shift = 3;
4595 else if (isize == 2)
4596 shift = 2;
4597 else
4598 shift = 0;
4599 offset = (INTVAL (ops[2]) +
4600 (GET_CODE (ops[1]) ==
4601 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4602 for (i = 0; i < isize; i++)
4603 dst[offset + i] = i + shift;
4604 return array_to_constant (TImode, dst);
644459d0 4605}
4606
4607/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4608 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4609 than 16 bytes, the value is repeated across the rest of the array. */
4610void
4611constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4612{
4613 HOST_WIDE_INT val;
4614 int i, j, first;
4615
4616 memset (arr, 0, 16);
4617 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4618 if (GET_CODE (x) == CONST_INT
4619 || (GET_CODE (x) == CONST_DOUBLE
4620 && (mode == SFmode || mode == DFmode)))
4621 {
4622 gcc_assert (mode != VOIDmode && mode != BLKmode);
4623
4624 if (GET_CODE (x) == CONST_DOUBLE)
4625 val = const_double_to_hwint (x);
4626 else
4627 val = INTVAL (x);
4628 first = GET_MODE_SIZE (mode) - 1;
4629 for (i = first; i >= 0; i--)
4630 {
4631 arr[i] = val & 0xff;
4632 val >>= 8;
4633 }
4634 /* Splat the constant across the whole array. */
4635 for (j = 0, i = first + 1; i < 16; i++)
4636 {
4637 arr[i] = arr[j];
4638 j = (j == first) ? 0 : j + 1;
4639 }
4640 }
4641 else if (GET_CODE (x) == CONST_DOUBLE)
4642 {
4643 val = CONST_DOUBLE_LOW (x);
4644 for (i = 15; i >= 8; i--)
4645 {
4646 arr[i] = val & 0xff;
4647 val >>= 8;
4648 }
4649 val = CONST_DOUBLE_HIGH (x);
4650 for (i = 7; i >= 0; i--)
4651 {
4652 arr[i] = val & 0xff;
4653 val >>= 8;
4654 }
4655 }
4656 else if (GET_CODE (x) == CONST_VECTOR)
4657 {
4658 int units;
4659 rtx elt;
4660 mode = GET_MODE_INNER (mode);
4661 units = CONST_VECTOR_NUNITS (x);
4662 for (i = 0; i < units; i++)
4663 {
4664 elt = CONST_VECTOR_ELT (x, i);
4665 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4666 {
4667 if (GET_CODE (elt) == CONST_DOUBLE)
4668 val = const_double_to_hwint (elt);
4669 else
4670 val = INTVAL (elt);
4671 first = GET_MODE_SIZE (mode) - 1;
4672 if (first + i * GET_MODE_SIZE (mode) > 16)
4673 abort ();
4674 for (j = first; j >= 0; j--)
4675 {
4676 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4677 val >>= 8;
4678 }
4679 }
4680 }
4681 }
4682 else
4683 gcc_unreachable();
4684}
4685
4686/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4687 smaller than 16 bytes, use the bytes that would represent that value
4688 in a register, e.g., for QImode return the value of arr[3]. */
4689rtx
4690array_to_constant (enum machine_mode mode, unsigned char arr[16])
4691{
4692 enum machine_mode inner_mode;
4693 rtvec v;
4694 int units, size, i, j, k;
4695 HOST_WIDE_INT val;
4696
4697 if (GET_MODE_CLASS (mode) == MODE_INT
4698 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4699 {
4700 j = GET_MODE_SIZE (mode);
4701 i = j < 4 ? 4 - j : 0;
4702 for (val = 0; i < j; i++)
4703 val = (val << 8) | arr[i];
4704 val = trunc_int_for_mode (val, mode);
4705 return GEN_INT (val);
4706 }
4707
4708 if (mode == TImode)
4709 {
4710 HOST_WIDE_INT high;
4711 for (i = high = 0; i < 8; i++)
4712 high = (high << 8) | arr[i];
4713 for (i = 8, val = 0; i < 16; i++)
4714 val = (val << 8) | arr[i];
4715 return immed_double_const (val, high, TImode);
4716 }
4717 if (mode == SFmode)
4718 {
4719 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4720 val = trunc_int_for_mode (val, SImode);
171b6d22 4721 return hwint_to_const_double (SFmode, val);
644459d0 4722 }
4723 if (mode == DFmode)
4724 {
4725 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4726 val <<= 32;
4727 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
171b6d22 4728 return hwint_to_const_double (DFmode, val);
644459d0 4729 }
4730
4731 if (!VECTOR_MODE_P (mode))
4732 abort ();
4733
4734 units = GET_MODE_NUNITS (mode);
4735 size = GET_MODE_UNIT_SIZE (mode);
4736 inner_mode = GET_MODE_INNER (mode);
4737 v = rtvec_alloc (units);
4738
4739 for (k = i = 0; i < units; ++i)
4740 {
4741 val = 0;
4742 for (j = 0; j < size; j++, k++)
4743 val = (val << 8) | arr[k];
4744
4745 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4746 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4747 else
4748 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4749 }
4750 if (k > 16)
4751 abort ();
4752
4753 return gen_rtx_CONST_VECTOR (mode, v);
4754}
4755
4756static void
4757reloc_diagnostic (rtx x)
4758{
4759 tree loc_decl, decl = 0;
4760 const char *msg;
4761 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4762 return;
4763
4764 if (GET_CODE (x) == SYMBOL_REF)
4765 decl = SYMBOL_REF_DECL (x);
4766 else if (GET_CODE (x) == CONST
4767 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4768 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4769
4770 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4771 if (decl && !DECL_P (decl))
4772 decl = 0;
4773
4774 /* We use last_assemble_variable_decl to get line information. It's
4775 not always going to be right and might not even be close, but will
4776 be right for the more common cases. */
5df189be 4777 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4778 loc_decl = decl;
4779 else
4780 loc_decl = last_assemble_variable_decl;
4781
4782 /* The decl could be a string constant. */
4783 if (decl && DECL_P (decl))
4784 msg = "%Jcreating run-time relocation for %qD";
4785 else
4786 msg = "creating run-time relocation";
4787
99369027 4788 if (TARGET_WARN_RELOC)
644459d0 4789 warning (0, msg, loc_decl, decl);
99369027 4790 else
4791 error (msg, loc_decl, decl);
644459d0 4792}
4793
4794/* Hook into assemble_integer so we can generate an error for run-time
4795 relocations. The SPU ABI disallows them. */
4796static bool
4797spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4798{
4799 /* By default run-time relocations aren't supported, but we allow them
4800 in case users support it in their own run-time loader. And we provide
4801 a warning for those users that don't. */
4802 if ((GET_CODE (x) == SYMBOL_REF)
4803 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4804 reloc_diagnostic (x);
4805
4806 return default_assemble_integer (x, size, aligned_p);
4807}
4808
4809static void
4810spu_asm_globalize_label (FILE * file, const char *name)
4811{
4812 fputs ("\t.global\t", file);
4813 assemble_name (file, name);
4814 fputs ("\n", file);
4815}
4816
4817static bool
f529eb25 4818spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4819 bool speed ATTRIBUTE_UNUSED)
644459d0 4820{
4821 enum machine_mode mode = GET_MODE (x);
4822 int cost = COSTS_N_INSNS (2);
4823
4824 /* Folding to a CONST_VECTOR will use extra space but there might
4825 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4826 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4827 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4828 because this cost will only be compared against a single insn.
4829 if (code == CONST_VECTOR)
4830 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4831 */
4832
4833 /* Use defaults for float operations. Not accurate but good enough. */
4834 if (mode == DFmode)
4835 {
4836 *total = COSTS_N_INSNS (13);
4837 return true;
4838 }
4839 if (mode == SFmode)
4840 {
4841 *total = COSTS_N_INSNS (6);
4842 return true;
4843 }
4844 switch (code)
4845 {
4846 case CONST_INT:
4847 if (satisfies_constraint_K (x))
4848 *total = 0;
4849 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4850 *total = COSTS_N_INSNS (1);
4851 else
4852 *total = COSTS_N_INSNS (3);
4853 return true;
4854
4855 case CONST:
4856 *total = COSTS_N_INSNS (3);
4857 return true;
4858
4859 case LABEL_REF:
4860 case SYMBOL_REF:
4861 *total = COSTS_N_INSNS (0);
4862 return true;
4863
4864 case CONST_DOUBLE:
4865 *total = COSTS_N_INSNS (5);
4866 return true;
4867
4868 case FLOAT_EXTEND:
4869 case FLOAT_TRUNCATE:
4870 case FLOAT:
4871 case UNSIGNED_FLOAT:
4872 case FIX:
4873 case UNSIGNED_FIX:
4874 *total = COSTS_N_INSNS (7);
4875 return true;
4876
4877 case PLUS:
4878 if (mode == TImode)
4879 {
4880 *total = COSTS_N_INSNS (9);
4881 return true;
4882 }
4883 break;
4884
4885 case MULT:
4886 cost =
4887 GET_CODE (XEXP (x, 0)) ==
4888 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4889 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4890 {
4891 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4892 {
4893 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4894 cost = COSTS_N_INSNS (14);
4895 if ((val & 0xffff) == 0)
4896 cost = COSTS_N_INSNS (9);
4897 else if (val > 0 && val < 0x10000)
4898 cost = COSTS_N_INSNS (11);
4899 }
4900 }
4901 *total = cost;
4902 return true;
4903 case DIV:
4904 case UDIV:
4905 case MOD:
4906 case UMOD:
4907 *total = COSTS_N_INSNS (20);
4908 return true;
4909 case ROTATE:
4910 case ROTATERT:
4911 case ASHIFT:
4912 case ASHIFTRT:
4913 case LSHIFTRT:
4914 *total = COSTS_N_INSNS (4);
4915 return true;
4916 case UNSPEC:
4917 if (XINT (x, 1) == UNSPEC_CONVERT)
4918 *total = COSTS_N_INSNS (0);
4919 else
4920 *total = COSTS_N_INSNS (4);
4921 return true;
4922 }
4923 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4924 if (GET_MODE_CLASS (mode) == MODE_INT
4925 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4926 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4927 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4928 *total = cost;
4929 return true;
4930}
4931
1bd43494 4932static enum machine_mode
4933spu_unwind_word_mode (void)
644459d0 4934{
1bd43494 4935 return SImode;
644459d0 4936}
4937
4938/* Decide whether we can make a sibling call to a function. DECL is the
4939 declaration of the function being targeted by the call and EXP is the
4940 CALL_EXPR representing the call. */
4941static bool
4942spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4943{
4944 return decl && !TARGET_LARGE_MEM;
4945}
4946
4947/* We need to correctly update the back chain pointer and the Available
4948 Stack Size (which is in the second slot of the sp register.) */
4949void
4950spu_allocate_stack (rtx op0, rtx op1)
4951{
4952 HOST_WIDE_INT v;
4953 rtx chain = gen_reg_rtx (V4SImode);
4954 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4955 rtx sp = gen_reg_rtx (V4SImode);
4956 rtx splatted = gen_reg_rtx (V4SImode);
4957 rtx pat = gen_reg_rtx (TImode);
4958
4959 /* copy the back chain so we can save it back again. */
4960 emit_move_insn (chain, stack_bot);
4961
4962 op1 = force_reg (SImode, op1);
4963
4964 v = 0x1020300010203ll;
4965 emit_move_insn (pat, immed_double_const (v, v, TImode));
4966 emit_insn (gen_shufb (splatted, op1, op1, pat));
4967
4968 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4969 emit_insn (gen_subv4si3 (sp, sp, splatted));
4970
4971 if (flag_stack_check)
4972 {
4973 rtx avail = gen_reg_rtx(SImode);
4974 rtx result = gen_reg_rtx(SImode);
4975 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4976 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4977 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4978 }
4979
4980 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4981
4982 emit_move_insn (stack_bot, chain);
4983
4984 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4985}
4986
4987void
4988spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4989{
4990 static unsigned char arr[16] =
4991 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4992 rtx temp = gen_reg_rtx (SImode);
4993 rtx temp2 = gen_reg_rtx (SImode);
4994 rtx temp3 = gen_reg_rtx (V4SImode);
4995 rtx temp4 = gen_reg_rtx (V4SImode);
4996 rtx pat = gen_reg_rtx (TImode);
4997 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4998
4999 /* Restore the backchain from the first word, sp from the second. */
5000 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5001 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5002
5003 emit_move_insn (pat, array_to_constant (TImode, arr));
5004
5005 /* Compute Available Stack Size for sp */
5006 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5007 emit_insn (gen_shufb (temp3, temp, temp, pat));
5008
5009 /* Compute Available Stack Size for back chain */
5010 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5011 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5012 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5013
5014 emit_insn (gen_addv4si3 (sp, sp, temp3));
5015 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5016}
5017
5018static void
5019spu_init_libfuncs (void)
5020{
5021 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5022 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5023 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5024 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5025 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5026 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5027 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5028 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5029 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5030 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5031 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5032
5033 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5034 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5035
5036 set_optab_libfunc (smul_optab, TImode, "__multi3");
5037 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5038 set_optab_libfunc (smod_optab, TImode, "__modti3");
5039 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5040 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5041 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5042}
5043
5044/* Make a subreg, stripping any existing subreg. We could possibly just
5045 call simplify_subreg, but in this case we know what we want. */
5046rtx
5047spu_gen_subreg (enum machine_mode mode, rtx x)
5048{
5049 if (GET_CODE (x) == SUBREG)
5050 x = SUBREG_REG (x);
5051 if (GET_MODE (x) == mode)
5052 return x;
5053 return gen_rtx_SUBREG (mode, x, 0);
5054}
5055
5056static bool
fb80456a 5057spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5058{
5059 return (TYPE_MODE (type) == BLKmode
5060 && ((type) == 0
5061 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5062 || int_size_in_bytes (type) >
5063 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5064}
5065\f
5066/* Create the built-in types and functions */
5067
5068struct spu_builtin_description spu_builtins[] = {
5069#define DEF_BUILTIN(fcode, icode, name, type, params) \
5070 {fcode, icode, name, type, params, NULL_TREE},
5071#include "spu-builtins.def"
5072#undef DEF_BUILTIN
5073};
5074
5075static void
5076spu_init_builtins (void)
5077{
5078 struct spu_builtin_description *d;
5079 unsigned int i;
5080
5081 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5082 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5083 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5084 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5085 V4SF_type_node = build_vector_type (float_type_node, 4);
5086 V2DF_type_node = build_vector_type (double_type_node, 2);
5087
5088 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5089 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5090 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5091 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5092
c4ecce0c 5093 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5094
5095 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5096 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5097 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5098 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5099 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5100 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5101 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5102 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5103 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5104 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5105 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5106 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5107
5108 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5109 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5110 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5111 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5112 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5113 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5114 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5115 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5116
5117 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5118 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5119
5120 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5121
5122 spu_builtin_types[SPU_BTI_PTR] =
5123 build_pointer_type (build_qualified_type
5124 (void_type_node,
5125 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5126
5127 /* For each builtin we build a new prototype. The tree code will make
5128 sure nodes are shared. */
5129 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5130 {
5131 tree p;
5132 char name[64]; /* build_function will make a copy. */
5133 int parm;
5134
5135 if (d->name == 0)
5136 continue;
5137
5dfbd18f 5138 /* Find last parm. */
644459d0 5139 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5140 ;
644459d0 5141
5142 p = void_list_node;
5143 while (parm > 1)
5144 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5145
5146 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5147
5148 sprintf (name, "__builtin_%s", d->name);
5149 d->fndecl =
5150 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5151 NULL, NULL_TREE);
a76866d3 5152 if (d->fcode == SPU_MASK_FOR_LOAD)
5153 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5154
5155 /* These builtins don't throw. */
5156 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5157 }
5158}
5159
cf31d486 5160void
5161spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5162{
5163 static unsigned char arr[16] =
5164 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5165
5166 rtx temp = gen_reg_rtx (Pmode);
5167 rtx temp2 = gen_reg_rtx (V4SImode);
5168 rtx temp3 = gen_reg_rtx (V4SImode);
5169 rtx pat = gen_reg_rtx (TImode);
5170 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5171
5172 emit_move_insn (pat, array_to_constant (TImode, arr));
5173
5174 /* Restore the sp. */
5175 emit_move_insn (temp, op1);
5176 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5177
5178 /* Compute available stack size for sp. */
5179 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5180 emit_insn (gen_shufb (temp3, temp, temp, pat));
5181
5182 emit_insn (gen_addv4si3 (sp, sp, temp3));
5183 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5184}
5185
644459d0 5186int
5187spu_safe_dma (HOST_WIDE_INT channel)
5188{
006e4b96 5189 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5190}
5191
5192void
5193spu_builtin_splats (rtx ops[])
5194{
5195 enum machine_mode mode = GET_MODE (ops[0]);
5196 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5197 {
5198 unsigned char arr[16];
5199 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5200 emit_move_insn (ops[0], array_to_constant (mode, arr));
5201 }
644459d0 5202 else
5203 {
5204 rtx reg = gen_reg_rtx (TImode);
5205 rtx shuf;
5206 if (GET_CODE (ops[1]) != REG
5207 && GET_CODE (ops[1]) != SUBREG)
5208 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5209 switch (mode)
5210 {
5211 case V2DImode:
5212 case V2DFmode:
5213 shuf =
5214 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5215 TImode);
5216 break;
5217 case V4SImode:
5218 case V4SFmode:
5219 shuf =
5220 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5221 TImode);
5222 break;
5223 case V8HImode:
5224 shuf =
5225 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5226 TImode);
5227 break;
5228 case V16QImode:
5229 shuf =
5230 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5231 TImode);
5232 break;
5233 default:
5234 abort ();
5235 }
5236 emit_move_insn (reg, shuf);
5237 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5238 }
5239}
5240
5241void
5242spu_builtin_extract (rtx ops[])
5243{
5244 enum machine_mode mode;
5245 rtx rot, from, tmp;
5246
5247 mode = GET_MODE (ops[1]);
5248
5249 if (GET_CODE (ops[2]) == CONST_INT)
5250 {
5251 switch (mode)
5252 {
5253 case V16QImode:
5254 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5255 break;
5256 case V8HImode:
5257 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5258 break;
5259 case V4SFmode:
5260 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5261 break;
5262 case V4SImode:
5263 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5264 break;
5265 case V2DImode:
5266 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5267 break;
5268 case V2DFmode:
5269 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5270 break;
5271 default:
5272 abort ();
5273 }
5274 return;
5275 }
5276
5277 from = spu_gen_subreg (TImode, ops[1]);
5278 rot = gen_reg_rtx (TImode);
5279 tmp = gen_reg_rtx (SImode);
5280
5281 switch (mode)
5282 {
5283 case V16QImode:
5284 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5285 break;
5286 case V8HImode:
5287 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5288 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5289 break;
5290 case V4SFmode:
5291 case V4SImode:
5292 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5293 break;
5294 case V2DImode:
5295 case V2DFmode:
5296 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5297 break;
5298 default:
5299 abort ();
5300 }
5301 emit_insn (gen_rotqby_ti (rot, from, tmp));
5302
5303 emit_insn (gen_spu_convert (ops[0], rot));
5304}
5305
5306void
5307spu_builtin_insert (rtx ops[])
5308{
5309 enum machine_mode mode = GET_MODE (ops[0]);
5310 enum machine_mode imode = GET_MODE_INNER (mode);
5311 rtx mask = gen_reg_rtx (TImode);
5312 rtx offset;
5313
5314 if (GET_CODE (ops[3]) == CONST_INT)
5315 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5316 else
5317 {
5318 offset = gen_reg_rtx (SImode);
5319 emit_insn (gen_mulsi3
5320 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5321 }
5322 emit_insn (gen_cpat
5323 (mask, stack_pointer_rtx, offset,
5324 GEN_INT (GET_MODE_SIZE (imode))));
5325 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5326}
5327
5328void
5329spu_builtin_promote (rtx ops[])
5330{
5331 enum machine_mode mode, imode;
5332 rtx rot, from, offset;
5333 HOST_WIDE_INT pos;
5334
5335 mode = GET_MODE (ops[0]);
5336 imode = GET_MODE_INNER (mode);
5337
5338 from = gen_reg_rtx (TImode);
5339 rot = spu_gen_subreg (TImode, ops[0]);
5340
5341 emit_insn (gen_spu_convert (from, ops[1]));
5342
5343 if (GET_CODE (ops[2]) == CONST_INT)
5344 {
5345 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5346 if (GET_MODE_SIZE (imode) < 4)
5347 pos += 4 - GET_MODE_SIZE (imode);
5348 offset = GEN_INT (pos & 15);
5349 }
5350 else
5351 {
5352 offset = gen_reg_rtx (SImode);
5353 switch (mode)
5354 {
5355 case V16QImode:
5356 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5357 break;
5358 case V8HImode:
5359 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5360 emit_insn (gen_addsi3 (offset, offset, offset));
5361 break;
5362 case V4SFmode:
5363 case V4SImode:
5364 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5365 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5366 break;
5367 case V2DImode:
5368 case V2DFmode:
5369 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5370 break;
5371 default:
5372 abort ();
5373 }
5374 }
5375 emit_insn (gen_rotqby_ti (rot, from, offset));
5376}
5377
5378void
5379spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5380{
5381 rtx shuf = gen_reg_rtx (V4SImode);
5382 rtx insn = gen_reg_rtx (V4SImode);
5383 rtx shufc;
5384 rtx insnc;
5385 rtx mem;
5386
5387 fnaddr = force_reg (SImode, fnaddr);
5388 cxt = force_reg (SImode, cxt);
5389
5390 if (TARGET_LARGE_MEM)
5391 {
5392 rtx rotl = gen_reg_rtx (V4SImode);
5393 rtx mask = gen_reg_rtx (V4SImode);
5394 rtx bi = gen_reg_rtx (SImode);
5395 unsigned char shufa[16] = {
5396 2, 3, 0, 1, 18, 19, 16, 17,
5397 0, 1, 2, 3, 16, 17, 18, 19
5398 };
5399 unsigned char insna[16] = {
5400 0x41, 0, 0, 79,
5401 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5402 0x60, 0x80, 0, 79,
5403 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5404 };
5405
5406 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5407 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5408
5409 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5410 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5411 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5412 emit_insn (gen_selb (insn, insnc, rotl, mask));
5413
5414 mem = memory_address (Pmode, tramp);
5415 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5416
5417 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5418 mem = memory_address (Pmode, plus_constant (tramp, 16));
5419 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5420 }
5421 else
5422 {
5423 rtx scxt = gen_reg_rtx (SImode);
5424 rtx sfnaddr = gen_reg_rtx (SImode);
5425 unsigned char insna[16] = {
5426 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5427 0x30, 0, 0, 0,
5428 0, 0, 0, 0,
5429 0, 0, 0, 0
5430 };
5431
5432 shufc = gen_reg_rtx (TImode);
5433 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5434
5435 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5436 fits 18 bits and the last 4 are zeros. This will be true if
5437 the stack pointer is initialized to 0x3fff0 at program start,
5438 otherwise the ila instruction will be garbage. */
5439
5440 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5441 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5442 emit_insn (gen_cpat
5443 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5444 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5445 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5446
5447 mem = memory_address (Pmode, tramp);
5448 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5449
5450 }
5451 emit_insn (gen_sync ());
5452}
5453
5454void
5455spu_expand_sign_extend (rtx ops[])
5456{
5457 unsigned char arr[16];
5458 rtx pat = gen_reg_rtx (TImode);
5459 rtx sign, c;
5460 int i, last;
5461 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5462 if (GET_MODE (ops[1]) == QImode)
5463 {
5464 sign = gen_reg_rtx (HImode);
5465 emit_insn (gen_extendqihi2 (sign, ops[1]));
5466 for (i = 0; i < 16; i++)
5467 arr[i] = 0x12;
5468 arr[last] = 0x13;
5469 }
5470 else
5471 {
5472 for (i = 0; i < 16; i++)
5473 arr[i] = 0x10;
5474 switch (GET_MODE (ops[1]))
5475 {
5476 case HImode:
5477 sign = gen_reg_rtx (SImode);
5478 emit_insn (gen_extendhisi2 (sign, ops[1]));
5479 arr[last] = 0x03;
5480 arr[last - 1] = 0x02;
5481 break;
5482 case SImode:
5483 sign = gen_reg_rtx (SImode);
5484 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5485 for (i = 0; i < 4; i++)
5486 arr[last - i] = 3 - i;
5487 break;
5488 case DImode:
5489 sign = gen_reg_rtx (SImode);
5490 c = gen_reg_rtx (SImode);
5491 emit_insn (gen_spu_convert (c, ops[1]));
5492 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5493 for (i = 0; i < 8; i++)
5494 arr[last - i] = 7 - i;
5495 break;
5496 default:
5497 abort ();
5498 }
5499 }
5500 emit_move_insn (pat, array_to_constant (TImode, arr));
5501 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5502}
5503
5504/* expand vector initialization. If there are any constant parts,
5505 load constant parts first. Then load any non-constant parts. */
5506void
5507spu_expand_vector_init (rtx target, rtx vals)
5508{
5509 enum machine_mode mode = GET_MODE (target);
5510 int n_elts = GET_MODE_NUNITS (mode);
5511 int n_var = 0;
5512 bool all_same = true;
790c536c 5513 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5514 int i;
5515
5516 first = XVECEXP (vals, 0, 0);
5517 for (i = 0; i < n_elts; ++i)
5518 {
5519 x = XVECEXP (vals, 0, i);
e442af0b 5520 if (!(CONST_INT_P (x)
5521 || GET_CODE (x) == CONST_DOUBLE
5522 || GET_CODE (x) == CONST_FIXED))
644459d0 5523 ++n_var;
5524 else
5525 {
5526 if (first_constant == NULL_RTX)
5527 first_constant = x;
5528 }
5529 if (i > 0 && !rtx_equal_p (x, first))
5530 all_same = false;
5531 }
5532
5533 /* if all elements are the same, use splats to repeat elements */
5534 if (all_same)
5535 {
5536 if (!CONSTANT_P (first)
5537 && !register_operand (first, GET_MODE (x)))
5538 first = force_reg (GET_MODE (first), first);
5539 emit_insn (gen_spu_splats (target, first));
5540 return;
5541 }
5542
5543 /* load constant parts */
5544 if (n_var != n_elts)
5545 {
5546 if (n_var == 0)
5547 {
5548 emit_move_insn (target,
5549 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5550 }
5551 else
5552 {
5553 rtx constant_parts_rtx = copy_rtx (vals);
5554
5555 gcc_assert (first_constant != NULL_RTX);
5556 /* fill empty slots with the first constant, this increases
5557 our chance of using splats in the recursive call below. */
5558 for (i = 0; i < n_elts; ++i)
e442af0b 5559 {
5560 x = XVECEXP (constant_parts_rtx, 0, i);
5561 if (!(CONST_INT_P (x)
5562 || GET_CODE (x) == CONST_DOUBLE
5563 || GET_CODE (x) == CONST_FIXED))
5564 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5565 }
644459d0 5566
5567 spu_expand_vector_init (target, constant_parts_rtx);
5568 }
5569 }
5570
5571 /* load variable parts */
5572 if (n_var != 0)
5573 {
5574 rtx insert_operands[4];
5575
5576 insert_operands[0] = target;
5577 insert_operands[2] = target;
5578 for (i = 0; i < n_elts; ++i)
5579 {
5580 x = XVECEXP (vals, 0, i);
e442af0b 5581 if (!(CONST_INT_P (x)
5582 || GET_CODE (x) == CONST_DOUBLE
5583 || GET_CODE (x) == CONST_FIXED))
644459d0 5584 {
5585 if (!register_operand (x, GET_MODE (x)))
5586 x = force_reg (GET_MODE (x), x);
5587 insert_operands[1] = x;
5588 insert_operands[3] = GEN_INT (i);
5589 spu_builtin_insert (insert_operands);
5590 }
5591 }
5592 }
5593}
6352eedf 5594
5474166e 5595/* Return insn index for the vector compare instruction for given CODE,
5596 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5597
5598static int
5599get_vec_cmp_insn (enum rtx_code code,
5600 enum machine_mode dest_mode,
5601 enum machine_mode op_mode)
5602
5603{
5604 switch (code)
5605 {
5606 case EQ:
5607 if (dest_mode == V16QImode && op_mode == V16QImode)
5608 return CODE_FOR_ceq_v16qi;
5609 if (dest_mode == V8HImode && op_mode == V8HImode)
5610 return CODE_FOR_ceq_v8hi;
5611 if (dest_mode == V4SImode && op_mode == V4SImode)
5612 return CODE_FOR_ceq_v4si;
5613 if (dest_mode == V4SImode && op_mode == V4SFmode)
5614 return CODE_FOR_ceq_v4sf;
5615 if (dest_mode == V2DImode && op_mode == V2DFmode)
5616 return CODE_FOR_ceq_v2df;
5617 break;
5618 case GT:
5619 if (dest_mode == V16QImode && op_mode == V16QImode)
5620 return CODE_FOR_cgt_v16qi;
5621 if (dest_mode == V8HImode && op_mode == V8HImode)
5622 return CODE_FOR_cgt_v8hi;
5623 if (dest_mode == V4SImode && op_mode == V4SImode)
5624 return CODE_FOR_cgt_v4si;
5625 if (dest_mode == V4SImode && op_mode == V4SFmode)
5626 return CODE_FOR_cgt_v4sf;
5627 if (dest_mode == V2DImode && op_mode == V2DFmode)
5628 return CODE_FOR_cgt_v2df;
5629 break;
5630 case GTU:
5631 if (dest_mode == V16QImode && op_mode == V16QImode)
5632 return CODE_FOR_clgt_v16qi;
5633 if (dest_mode == V8HImode && op_mode == V8HImode)
5634 return CODE_FOR_clgt_v8hi;
5635 if (dest_mode == V4SImode && op_mode == V4SImode)
5636 return CODE_FOR_clgt_v4si;
5637 break;
5638 default:
5639 break;
5640 }
5641 return -1;
5642}
5643
5644/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5645 DMODE is expected destination mode. This is a recursive function. */
5646
5647static rtx
5648spu_emit_vector_compare (enum rtx_code rcode,
5649 rtx op0, rtx op1,
5650 enum machine_mode dmode)
5651{
5652 int vec_cmp_insn;
5653 rtx mask;
5654 enum machine_mode dest_mode;
5655 enum machine_mode op_mode = GET_MODE (op1);
5656
5657 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5658
5659 /* Floating point vector compare instructions uses destination V4SImode.
5660 Double floating point vector compare instructions uses destination V2DImode.
5661 Move destination to appropriate mode later. */
5662 if (dmode == V4SFmode)
5663 dest_mode = V4SImode;
5664 else if (dmode == V2DFmode)
5665 dest_mode = V2DImode;
5666 else
5667 dest_mode = dmode;
5668
5669 mask = gen_reg_rtx (dest_mode);
5670 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5671
5672 if (vec_cmp_insn == -1)
5673 {
5674 bool swap_operands = false;
5675 bool try_again = false;
5676 switch (rcode)
5677 {
5678 case LT:
5679 rcode = GT;
5680 swap_operands = true;
5681 try_again = true;
5682 break;
5683 case LTU:
5684 rcode = GTU;
5685 swap_operands = true;
5686 try_again = true;
5687 break;
5688 case NE:
5689 /* Treat A != B as ~(A==B). */
5690 {
5691 enum insn_code nor_code;
5692 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5693 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5694 gcc_assert (nor_code != CODE_FOR_nothing);
5695 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5696 if (dmode != dest_mode)
5697 {
5698 rtx temp = gen_reg_rtx (dest_mode);
5699 convert_move (temp, mask, 0);
5700 return temp;
5701 }
5702 return mask;
5703 }
5704 break;
5705 case GE:
5706 case GEU:
5707 case LE:
5708 case LEU:
5709 /* Try GT/GTU/LT/LTU OR EQ */
5710 {
5711 rtx c_rtx, eq_rtx;
5712 enum insn_code ior_code;
5713 enum rtx_code new_code;
5714
5715 switch (rcode)
5716 {
5717 case GE: new_code = GT; break;
5718 case GEU: new_code = GTU; break;
5719 case LE: new_code = LT; break;
5720 case LEU: new_code = LTU; break;
5721 default:
5722 gcc_unreachable ();
5723 }
5724
5725 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5726 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5727
99bdde56 5728 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5729 gcc_assert (ior_code != CODE_FOR_nothing);
5730 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5731 if (dmode != dest_mode)
5732 {
5733 rtx temp = gen_reg_rtx (dest_mode);
5734 convert_move (temp, mask, 0);
5735 return temp;
5736 }
5737 return mask;
5738 }
5739 break;
5740 default:
5741 gcc_unreachable ();
5742 }
5743
5744 /* You only get two chances. */
5745 if (try_again)
5746 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5747
5748 gcc_assert (vec_cmp_insn != -1);
5749
5750 if (swap_operands)
5751 {
5752 rtx tmp;
5753 tmp = op0;
5754 op0 = op1;
5755 op1 = tmp;
5756 }
5757 }
5758
5759 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5760 if (dmode != dest_mode)
5761 {
5762 rtx temp = gen_reg_rtx (dest_mode);
5763 convert_move (temp, mask, 0);
5764 return temp;
5765 }
5766 return mask;
5767}
5768
5769
5770/* Emit vector conditional expression.
5771 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5772 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5773
5774int
5775spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5776 rtx cond, rtx cc_op0, rtx cc_op1)
5777{
5778 enum machine_mode dest_mode = GET_MODE (dest);
5779 enum rtx_code rcode = GET_CODE (cond);
5780 rtx mask;
5781
5782 /* Get the vector mask for the given relational operations. */
5783 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5784
5785 emit_insn(gen_selb (dest, op2, op1, mask));
5786
5787 return 1;
5788}
5789
6352eedf 5790static rtx
5791spu_force_reg (enum machine_mode mode, rtx op)
5792{
5793 rtx x, r;
5794 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5795 {
5796 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5797 || GET_MODE (op) == BLKmode)
5798 return force_reg (mode, convert_to_mode (mode, op, 0));
5799 abort ();
5800 }
5801
5802 r = force_reg (GET_MODE (op), op);
5803 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5804 {
5805 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5806 if (x)
5807 return x;
5808 }
5809
5810 x = gen_reg_rtx (mode);
5811 emit_insn (gen_spu_convert (x, r));
5812 return x;
5813}
5814
5815static void
5816spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5817{
5818 HOST_WIDE_INT v = 0;
5819 int lsbits;
5820 /* Check the range of immediate operands. */
5821 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5822 {
5823 int range = p - SPU_BTI_7;
5df189be 5824
5825 if (!CONSTANT_P (op))
6352eedf 5826 error ("%s expects an integer literal in the range [%d, %d].",
5827 d->name,
5828 spu_builtin_range[range].low, spu_builtin_range[range].high);
5829
5830 if (GET_CODE (op) == CONST
5831 && (GET_CODE (XEXP (op, 0)) == PLUS
5832 || GET_CODE (XEXP (op, 0)) == MINUS))
5833 {
5834 v = INTVAL (XEXP (XEXP (op, 0), 1));
5835 op = XEXP (XEXP (op, 0), 0);
5836 }
5837 else if (GET_CODE (op) == CONST_INT)
5838 v = INTVAL (op);
5df189be 5839 else if (GET_CODE (op) == CONST_VECTOR
5840 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5841 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5842
5843 /* The default for v is 0 which is valid in every range. */
5844 if (v < spu_builtin_range[range].low
5845 || v > spu_builtin_range[range].high)
5846 error ("%s expects an integer literal in the range [%d, %d]. ("
5847 HOST_WIDE_INT_PRINT_DEC ")",
5848 d->name,
5849 spu_builtin_range[range].low, spu_builtin_range[range].high,
5850 v);
6352eedf 5851
5852 switch (p)
5853 {
5854 case SPU_BTI_S10_4:
5855 lsbits = 4;
5856 break;
5857 case SPU_BTI_U16_2:
5858 /* This is only used in lqa, and stqa. Even though the insns
5859 encode 16 bits of the address (all but the 2 least
5860 significant), only 14 bits are used because it is masked to
5861 be 16 byte aligned. */
5862 lsbits = 4;
5863 break;
5864 case SPU_BTI_S16_2:
5865 /* This is used for lqr and stqr. */
5866 lsbits = 2;
5867 break;
5868 default:
5869 lsbits = 0;
5870 }
5871
5872 if (GET_CODE (op) == LABEL_REF
5873 || (GET_CODE (op) == SYMBOL_REF
5874 && SYMBOL_REF_FUNCTION_P (op))
5df189be 5875 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 5876 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5877 d->name);
5878 }
5879}
5880
5881
5882static void
5df189be 5883expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 5884 rtx target, rtx ops[])
5885{
5886 enum insn_code icode = d->icode;
5df189be 5887 int i = 0, a;
6352eedf 5888
5889 /* Expand the arguments into rtl. */
5890
5891 if (d->parm[0] != SPU_BTI_VOID)
5892 ops[i++] = target;
5893
5df189be 5894 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
6352eedf 5895 {
5df189be 5896 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 5897 if (arg == 0)
5898 abort ();
5899 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
6352eedf 5900 }
5901}
5902
5903static rtx
5904spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 5905 tree exp, rtx target)
6352eedf 5906{
5907 rtx pat;
5908 rtx ops[8];
5909 enum insn_code icode = d->icode;
5910 enum machine_mode mode, tmode;
5911 int i, p;
5912 tree return_type;
5913
5914 /* Set up ops[] with values from arglist. */
5df189be 5915 expand_builtin_args (d, exp, target, ops);
6352eedf 5916
5917 /* Handle the target operand which must be operand 0. */
5918 i = 0;
5919 if (d->parm[0] != SPU_BTI_VOID)
5920 {
5921
5922 /* We prefer the mode specified for the match_operand otherwise
5923 use the mode from the builtin function prototype. */
5924 tmode = insn_data[d->icode].operand[0].mode;
5925 if (tmode == VOIDmode)
5926 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5927
5928 /* Try to use target because not using it can lead to extra copies
5929 and when we are using all of the registers extra copies leads
5930 to extra spills. */
5931 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5932 ops[0] = target;
5933 else
5934 target = ops[0] = gen_reg_rtx (tmode);
5935
5936 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5937 abort ();
5938
5939 i++;
5940 }
5941
a76866d3 5942 if (d->fcode == SPU_MASK_FOR_LOAD)
5943 {
5944 enum machine_mode mode = insn_data[icode].operand[1].mode;
5945 tree arg;
5946 rtx addr, op, pat;
5947
5948 /* get addr */
5df189be 5949 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 5950 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5951 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5952 addr = memory_address (mode, op);
5953
5954 /* negate addr */
5955 op = gen_reg_rtx (GET_MODE (addr));
5956 emit_insn (gen_rtx_SET (VOIDmode, op,
5957 gen_rtx_NEG (GET_MODE (addr), addr)));
5958 op = gen_rtx_MEM (mode, op);
5959
5960 pat = GEN_FCN (icode) (target, op);
5961 if (!pat)
5962 return 0;
5963 emit_insn (pat);
5964 return target;
5965 }
5966
6352eedf 5967 /* Ignore align_hint, but still expand it's args in case they have
5968 side effects. */
5969 if (icode == CODE_FOR_spu_align_hint)
5970 return 0;
5971
5972 /* Handle the rest of the operands. */
5973 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5974 {
5975 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5976 mode = insn_data[d->icode].operand[i].mode;
5977 else
5978 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5979
5980 /* mode can be VOIDmode here for labels */
5981
5982 /* For specific intrinsics with an immediate operand, e.g.,
5983 si_ai(), we sometimes need to convert the scalar argument to a
5984 vector argument by splatting the scalar. */
5985 if (VECTOR_MODE_P (mode)
5986 && (GET_CODE (ops[i]) == CONST_INT
5987 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 5988 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 5989 {
5990 if (GET_CODE (ops[i]) == CONST_INT)
5991 ops[i] = spu_const (mode, INTVAL (ops[i]));
5992 else
5993 {
5994 rtx reg = gen_reg_rtx (mode);
5995 enum machine_mode imode = GET_MODE_INNER (mode);
5996 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5997 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5998 if (imode != GET_MODE (ops[i]))
5999 ops[i] = convert_to_mode (imode, ops[i],
6000 TYPE_UNSIGNED (spu_builtin_types
6001 [d->parm[i]]));
6002 emit_insn (gen_spu_splats (reg, ops[i]));
6003 ops[i] = reg;
6004 }
6005 }
6006
5df189be 6007 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6008
6352eedf 6009 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6010 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6011 }
6012
6013 switch (insn_data[icode].n_operands)
6014 {
6015 case 0:
6016 pat = GEN_FCN (icode) (0);
6017 break;
6018 case 1:
6019 pat = GEN_FCN (icode) (ops[0]);
6020 break;
6021 case 2:
6022 pat = GEN_FCN (icode) (ops[0], ops[1]);
6023 break;
6024 case 3:
6025 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6026 break;
6027 case 4:
6028 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6029 break;
6030 case 5:
6031 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6032 break;
6033 case 6:
6034 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6035 break;
6036 default:
6037 abort ();
6038 }
6039
6040 if (!pat)
6041 abort ();
6042
6043 if (d->type == B_CALL || d->type == B_BISLED)
6044 emit_call_insn (pat);
6045 else if (d->type == B_JUMP)
6046 {
6047 emit_jump_insn (pat);
6048 emit_barrier ();
6049 }
6050 else
6051 emit_insn (pat);
6052
6053 return_type = spu_builtin_types[d->parm[0]];
6054 if (d->parm[0] != SPU_BTI_VOID
6055 && GET_MODE (target) != TYPE_MODE (return_type))
6056 {
6057 /* target is the return value. It should always be the mode of
6058 the builtin function prototype. */
6059 target = spu_force_reg (TYPE_MODE (return_type), target);
6060 }
6061
6062 return target;
6063}
6064
6065rtx
6066spu_expand_builtin (tree exp,
6067 rtx target,
6068 rtx subtarget ATTRIBUTE_UNUSED,
6069 enum machine_mode mode ATTRIBUTE_UNUSED,
6070 int ignore ATTRIBUTE_UNUSED)
6071{
5df189be 6072 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6073 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6074 struct spu_builtin_description *d;
6075
6076 if (fcode < NUM_SPU_BUILTINS)
6077 {
6078 d = &spu_builtins[fcode];
6079
5df189be 6080 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6081 }
6082 abort ();
6083}
6084
e99f512d 6085/* Implement targetm.vectorize.builtin_mul_widen_even. */
6086static tree
6087spu_builtin_mul_widen_even (tree type)
6088{
e99f512d 6089 switch (TYPE_MODE (type))
6090 {
6091 case V8HImode:
6092 if (TYPE_UNSIGNED (type))
6093 return spu_builtins[SPU_MULE_0].fndecl;
6094 else
6095 return spu_builtins[SPU_MULE_1].fndecl;
6096 break;
6097 default:
6098 return NULL_TREE;
6099 }
6100}
6101
6102/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6103static tree
6104spu_builtin_mul_widen_odd (tree type)
6105{
6106 switch (TYPE_MODE (type))
6107 {
6108 case V8HImode:
6109 if (TYPE_UNSIGNED (type))
6110 return spu_builtins[SPU_MULO_1].fndecl;
6111 else
6112 return spu_builtins[SPU_MULO_0].fndecl;
6113 break;
6114 default:
6115 return NULL_TREE;
6116 }
6117}
6118
a76866d3 6119/* Implement targetm.vectorize.builtin_mask_for_load. */
6120static tree
6121spu_builtin_mask_for_load (void)
6122{
6123 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6124 gcc_assert (d);
6125 return d->fndecl;
6126}
5df189be 6127
a28df51d 6128/* Implement targetm.vectorize.builtin_vectorization_cost. */
6129static int
6130spu_builtin_vectorization_cost (bool runtime_test)
6131{
6132 /* If the branch of the runtime test is taken - i.e. - the vectorized
6133 version is skipped - this incurs a misprediction cost (because the
6134 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6135 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6136 when the vectorized version is executed. */
6137 if (runtime_test)
6138 return -19;
6139 else
6140 return 0;
6141}
6142
0e87db76 6143/* Return true iff, data reference of TYPE can reach vector alignment (16)
6144 after applying N number of iterations. This routine does not determine
6145 how may iterations are required to reach desired alignment. */
6146
6147static bool
a9f1838b 6148spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6149{
6150 if (is_packed)
6151 return false;
6152
6153 /* All other types are naturally aligned. */
6154 return true;
6155}
6156
a0515226 6157/* Implement targetm.vectorize.builtin_vec_perm. */
6158tree
6159spu_builtin_vec_perm (tree type, tree *mask_element_type)
6160{
6161 struct spu_builtin_description *d;
6162
6163 *mask_element_type = unsigned_char_type_node;
6164
6165 switch (TYPE_MODE (type))
6166 {
6167 case V16QImode:
6168 if (TYPE_UNSIGNED (type))
6169 d = &spu_builtins[SPU_SHUFFLE_0];
6170 else
6171 d = &spu_builtins[SPU_SHUFFLE_1];
6172 break;
6173
6174 case V8HImode:
6175 if (TYPE_UNSIGNED (type))
6176 d = &spu_builtins[SPU_SHUFFLE_2];
6177 else
6178 d = &spu_builtins[SPU_SHUFFLE_3];
6179 break;
6180
6181 case V4SImode:
6182 if (TYPE_UNSIGNED (type))
6183 d = &spu_builtins[SPU_SHUFFLE_4];
6184 else
6185 d = &spu_builtins[SPU_SHUFFLE_5];
6186 break;
6187
6188 case V2DImode:
6189 if (TYPE_UNSIGNED (type))
6190 d = &spu_builtins[SPU_SHUFFLE_6];
6191 else
6192 d = &spu_builtins[SPU_SHUFFLE_7];
6193 break;
6194
6195 case V4SFmode:
6196 d = &spu_builtins[SPU_SHUFFLE_8];
6197 break;
6198
6199 case V2DFmode:
6200 d = &spu_builtins[SPU_SHUFFLE_9];
6201 break;
6202
6203 default:
6204 return NULL_TREE;
6205 }
6206
6207 gcc_assert (d);
6208 return d->fndecl;
6209}
6210
d52fd16a 6211/* Count the total number of instructions in each pipe and return the
6212 maximum, which is used as the Minimum Iteration Interval (MII)
6213 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6214 -2 are instructions that can go in pipe0 or pipe1. */
6215static int
6216spu_sms_res_mii (struct ddg *g)
6217{
6218 int i;
6219 unsigned t[4] = {0, 0, 0, 0};
6220
6221 for (i = 0; i < g->num_nodes; i++)
6222 {
6223 rtx insn = g->nodes[i].insn;
6224 int p = get_pipe (insn) + 2;
6225
6226 assert (p >= 0);
6227 assert (p < 4);
6228
6229 t[p]++;
6230 if (dump_file && INSN_P (insn))
6231 fprintf (dump_file, "i%d %s %d %d\n",
6232 INSN_UID (insn),
6233 insn_data[INSN_CODE(insn)].name,
6234 p, t[p]);
6235 }
6236 if (dump_file)
6237 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6238
6239 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6240}
6241
6242
5df189be 6243void
6244spu_init_expanders (void)
6245{
6246 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6247 * frame_pointer_needed is true. We don't know that until we're
6248 * expanding the prologue. */
6249 if (cfun)
6250 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 6251}
6252
6253static enum machine_mode
6254spu_libgcc_cmp_return_mode (void)
6255{
6256
6257/* For SPU word mode is TI mode so it is better to use SImode
6258 for compare returns. */
6259 return SImode;
6260}
6261
6262static enum machine_mode
6263spu_libgcc_shift_count_mode (void)
6264{
6265/* For SPU word mode is TI mode so it is better to use SImode
6266 for shift counts. */
6267 return SImode;
6268}
5a976006 6269
6270/* An early place to adjust some flags after GCC has finished processing
6271 * them. */
6272static void
6273asm_file_start (void)
6274{
6275 /* Variable tracking should be run after all optimizations which
6276 change order of insns. It also needs a valid CFG. */
6277 spu_flag_var_tracking = flag_var_tracking;
6278 flag_var_tracking = 0;
6279
6280 default_file_start ();
6281}
6282