]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* config/spu/spu.c (spu_safe_dma): Respect TARGET_SAFE_DMA.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
75a70cf9 1/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
51#include "c-common.h"
52#include "machmode.h"
75a70cf9 53#include "gimple.h"
644459d0 54#include "tm-constrs.h"
55#include "spu-builtins.h"
d52fd16a 56#include "ddg.h"
6352eedf 57
58/* Builtin types, data and prototypes. */
59struct spu_builtin_range
60{
61 int low, high;
62};
63
64static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
77};
78
644459d0 79\f
80/* Target specific attribute specifications. */
81char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
82
83/* Prototypes and external defs. */
84static void spu_init_builtins (void);
85static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88static rtx get_pic_reg (void);
89static int need_to_save_reg (int regno, int saving);
90static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94static void emit_nop_for_insn (rtx insn);
95static bool insn_clobbers_hbr (rtx insn);
96static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
5474166e 98static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
99 enum machine_mode dmode);
644459d0 100static rtx get_branch_target (rtx branch);
101static void insert_branch_hints (void);
102static void insert_nops (void);
103static void spu_machine_dependent_reorg (void);
104static int spu_sched_issue_rate (void);
105static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
106 int can_issue_more);
107static int get_pipe (rtx insn);
108static int spu_sched_adjust_priority (rtx insn, int pri);
109static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
111 int flags,
112 unsigned char *no_add_attrs);
113static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116static int spu_naked_function_p (tree func);
fb80456a 117static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
118 const_tree type, unsigned char named);
644459d0 119static tree spu_build_builtin_va_list (void);
8a58ed0a 120static void spu_va_start (tree, rtx);
75a70cf9 121static tree spu_gimplify_va_arg_expr (tree valist, tree type,
122 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 123static int regno_aligned_for_load (int regno);
124static int store_with_one_insn_p (rtx mem);
644459d0 125static int mem_is_padded_component_ref (rtx x);
126static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
127static void spu_asm_globalize_label (FILE * file, const char *name);
128static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
129 int *total);
130static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
131static void spu_init_libfuncs (void);
fb80456a 132static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 133static void fix_range (const char *);
69ced2d6 134static void spu_encode_section_info (tree, rtx, int);
e99f512d 135static tree spu_builtin_mul_widen_even (tree);
136static tree spu_builtin_mul_widen_odd (tree);
a76866d3 137static tree spu_builtin_mask_for_load (void);
a28df51d 138static int spu_builtin_vectorization_cost (bool);
a9f1838b 139static bool spu_vector_alignment_reachable (const_tree, bool);
d52fd16a 140static int spu_sms_res_mii (struct ddg *g);
644459d0 141
142extern const char *reg_names[];
143rtx spu_compare_op0, spu_compare_op1;
144
5474166e 145/* Which instruction set architecture to use. */
146int spu_arch;
147/* Which cpu are we tuning for. */
148int spu_tune;
149
644459d0 150enum spu_immediate {
151 SPU_NONE,
152 SPU_IL,
153 SPU_ILA,
154 SPU_ILH,
155 SPU_ILHU,
156 SPU_ORI,
157 SPU_ORHI,
158 SPU_ORBI,
99369027 159 SPU_IOHL
644459d0 160};
dea01258 161enum immediate_class
162{
163 IC_POOL, /* constant pool */
164 IC_IL1, /* one il* instruction */
165 IC_IL2, /* both ilhu and iohl instructions */
166 IC_IL1s, /* one il* instruction */
167 IC_IL2s, /* both ilhu and iohl instructions */
168 IC_FSMBI, /* the fsmbi instruction */
169 IC_CPAT, /* one of the c*d instructions */
5df189be 170 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 171};
644459d0 172
173static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
174static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 175static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
176static enum immediate_class classify_immediate (rtx op,
177 enum machine_mode mode);
644459d0 178
1bd43494 179static enum machine_mode spu_unwind_word_mode (void);
180
ea32e033 181static enum machine_mode
182spu_libgcc_cmp_return_mode (void);
183
184static enum machine_mode
185spu_libgcc_shift_count_mode (void);
186
644459d0 187/* Built in types. */
188tree spu_builtin_types[SPU_BTI_MAX];
189\f
190/* TARGET overrides. */
191
192#undef TARGET_INIT_BUILTINS
193#define TARGET_INIT_BUILTINS spu_init_builtins
194
644459d0 195#undef TARGET_EXPAND_BUILTIN
196#define TARGET_EXPAND_BUILTIN spu_expand_builtin
197
1bd43494 198#undef TARGET_UNWIND_WORD_MODE
199#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 200
201/* The .8byte directive doesn't seem to work well for a 32 bit
202 architecture. */
203#undef TARGET_ASM_UNALIGNED_DI_OP
204#define TARGET_ASM_UNALIGNED_DI_OP NULL
205
206#undef TARGET_RTX_COSTS
207#define TARGET_RTX_COSTS spu_rtx_costs
208
209#undef TARGET_ADDRESS_COST
210#define TARGET_ADDRESS_COST hook_int_rtx_0
211
212#undef TARGET_SCHED_ISSUE_RATE
213#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
214
215#undef TARGET_SCHED_VARIABLE_ISSUE
216#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
217
218#undef TARGET_SCHED_ADJUST_PRIORITY
219#define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
220
221#undef TARGET_SCHED_ADJUST_COST
222#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
223
224const struct attribute_spec spu_attribute_table[];
225#undef TARGET_ATTRIBUTE_TABLE
226#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
227
228#undef TARGET_ASM_INTEGER
229#define TARGET_ASM_INTEGER spu_assemble_integer
230
231#undef TARGET_SCALAR_MODE_SUPPORTED_P
232#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
233
234#undef TARGET_VECTOR_MODE_SUPPORTED_P
235#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
236
237#undef TARGET_FUNCTION_OK_FOR_SIBCALL
238#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
239
240#undef TARGET_ASM_GLOBALIZE_LABEL
241#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
242
243#undef TARGET_PASS_BY_REFERENCE
244#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
245
246#undef TARGET_MUST_PASS_IN_STACK
247#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
248
249#undef TARGET_BUILD_BUILTIN_VA_LIST
250#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
251
8a58ed0a 252#undef TARGET_EXPAND_BUILTIN_VA_START
253#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
254
644459d0 255#undef TARGET_SETUP_INCOMING_VARARGS
256#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
257
258#undef TARGET_MACHINE_DEPENDENT_REORG
259#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
260
261#undef TARGET_GIMPLIFY_VA_ARG_EXPR
262#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
263
264#undef TARGET_DEFAULT_TARGET_FLAGS
265#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
266
267#undef TARGET_INIT_LIBFUNCS
268#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
269
270#undef TARGET_RETURN_IN_MEMORY
271#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
272
69ced2d6 273#undef TARGET_ENCODE_SECTION_INFO
274#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
275
e99f512d 276#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
277#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
278
279#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
280#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
281
a76866d3 282#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
283#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
284
a28df51d 285#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
286#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
287
0e87db76 288#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
289#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
290
ea32e033 291#undef TARGET_LIBGCC_CMP_RETURN_MODE
292#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
293
294#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
295#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
296
d52fd16a 297#undef TARGET_SCHED_SMS_RES_MII
298#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
299
644459d0 300struct gcc_target targetm = TARGET_INITIALIZER;
301
5df189be 302void
303spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
304{
5df189be 305 /* Override some of the default param values. With so many registers
306 larger values are better for these params. */
307 MAX_PENDING_LIST_LENGTH = 128;
308
309 /* With so many registers this is better on by default. */
310 flag_rename_registers = 1;
311}
312
644459d0 313/* Sometimes certain combinations of command options do not make sense
314 on a particular target machine. You can define a macro
315 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
316 executed once just after all the command options have been parsed. */
317void
318spu_override_options (void)
319{
14d408d9 320 /* Small loops will be unpeeled at -O3. For SPU it is more important
321 to keep code small by default. */
322 if (!flag_unroll_loops && !flag_peel_loops
323 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
324 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
325
644459d0 326 flag_omit_frame_pointer = 1;
327
328 if (align_functions < 8)
329 align_functions = 8;
c7b91b14 330
331 if (spu_fixed_range_string)
332 fix_range (spu_fixed_range_string);
5474166e 333
334 /* Determine processor architectural level. */
335 if (spu_arch_string)
336 {
337 if (strcmp (&spu_arch_string[0], "cell") == 0)
338 spu_arch = PROCESSOR_CELL;
339 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
340 spu_arch = PROCESSOR_CELLEDP;
341 else
342 error ("Unknown architecture '%s'", &spu_arch_string[0]);
343 }
344
345 /* Determine processor to tune for. */
346 if (spu_tune_string)
347 {
348 if (strcmp (&spu_tune_string[0], "cell") == 0)
349 spu_tune = PROCESSOR_CELL;
350 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
351 spu_tune = PROCESSOR_CELLEDP;
352 else
353 error ("Unknown architecture '%s'", &spu_tune_string[0]);
354 }
644459d0 355}
356\f
357/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
358 struct attribute_spec.handler. */
359
360/* Table of machine attributes. */
361const struct attribute_spec spu_attribute_table[] =
362{
363 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
364 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
365 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
366 { NULL, 0, 0, false, false, false, NULL }
367};
368
369/* True if MODE is valid for the target. By "valid", we mean able to
370 be manipulated in non-trivial ways. In particular, this means all
371 the arithmetic is supported. */
372static bool
373spu_scalar_mode_supported_p (enum machine_mode mode)
374{
375 switch (mode)
376 {
377 case QImode:
378 case HImode:
379 case SImode:
380 case SFmode:
381 case DImode:
382 case TImode:
383 case DFmode:
384 return true;
385
386 default:
387 return false;
388 }
389}
390
391/* Similarly for vector modes. "Supported" here is less strict. At
392 least some operations are supported; need to check optabs or builtins
393 for further details. */
394static bool
395spu_vector_mode_supported_p (enum machine_mode mode)
396{
397 switch (mode)
398 {
399 case V16QImode:
400 case V8HImode:
401 case V4SImode:
402 case V2DImode:
403 case V4SFmode:
404 case V2DFmode:
405 return true;
406
407 default:
408 return false;
409 }
410}
411
412/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
413 least significant bytes of the outer mode. This function returns
414 TRUE for the SUBREG's where this is correct. */
415int
416valid_subreg (rtx op)
417{
418 enum machine_mode om = GET_MODE (op);
419 enum machine_mode im = GET_MODE (SUBREG_REG (op));
420 return om != VOIDmode && im != VOIDmode
421 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 422 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
423 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 424}
425
426/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 427 and adjust the start offset. */
644459d0 428static rtx
429adjust_operand (rtx op, HOST_WIDE_INT * start)
430{
431 enum machine_mode mode;
432 int op_size;
38aca5eb 433 /* Strip any paradoxical SUBREG. */
434 if (GET_CODE (op) == SUBREG
435 && (GET_MODE_BITSIZE (GET_MODE (op))
436 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 437 {
438 if (start)
439 *start -=
440 GET_MODE_BITSIZE (GET_MODE (op)) -
441 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
442 op = SUBREG_REG (op);
443 }
444 /* If it is smaller than SI, assure a SUBREG */
445 op_size = GET_MODE_BITSIZE (GET_MODE (op));
446 if (op_size < 32)
447 {
448 if (start)
449 *start += 32 - op_size;
450 op_size = 32;
451 }
452 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
453 mode = mode_for_size (op_size, MODE_INT, 0);
454 if (mode != GET_MODE (op))
455 op = gen_rtx_SUBREG (mode, op, 0);
456 return op;
457}
458
459void
460spu_expand_extv (rtx ops[], int unsignedp)
461{
462 HOST_WIDE_INT width = INTVAL (ops[2]);
463 HOST_WIDE_INT start = INTVAL (ops[3]);
464 HOST_WIDE_INT src_size, dst_size;
465 enum machine_mode src_mode, dst_mode;
466 rtx dst = ops[0], src = ops[1];
467 rtx s;
468
469 dst = adjust_operand (ops[0], 0);
470 dst_mode = GET_MODE (dst);
471 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
472
644459d0 473 src = adjust_operand (src, &start);
474 src_mode = GET_MODE (src);
475 src_size = GET_MODE_BITSIZE (GET_MODE (src));
476
477 if (start > 0)
478 {
479 s = gen_reg_rtx (src_mode);
480 switch (src_mode)
481 {
482 case SImode:
483 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
484 break;
485 case DImode:
486 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
487 break;
488 case TImode:
489 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
490 break;
491 default:
492 abort ();
493 }
494 src = s;
495 }
496
497 if (width < src_size)
498 {
499 rtx pat;
500 int icode;
501 switch (src_mode)
502 {
503 case SImode:
504 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
505 break;
506 case DImode:
507 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
508 break;
509 case TImode:
510 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
511 break;
512 default:
513 abort ();
514 }
515 s = gen_reg_rtx (src_mode);
516 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
517 emit_insn (pat);
518 src = s;
519 }
520
521 convert_move (dst, src, unsignedp);
522}
523
524void
525spu_expand_insv (rtx ops[])
526{
527 HOST_WIDE_INT width = INTVAL (ops[1]);
528 HOST_WIDE_INT start = INTVAL (ops[2]);
529 HOST_WIDE_INT maskbits;
530 enum machine_mode dst_mode, src_mode;
531 rtx dst = ops[0], src = ops[3];
532 int dst_size, src_size;
533 rtx mask;
534 rtx shift_reg;
535 int shift;
536
537
538 if (GET_CODE (ops[0]) == MEM)
539 dst = gen_reg_rtx (TImode);
540 else
541 dst = adjust_operand (dst, &start);
542 dst_mode = GET_MODE (dst);
543 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
544
545 if (CONSTANT_P (src))
546 {
547 enum machine_mode m =
548 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
549 src = force_reg (m, convert_to_mode (m, src, 0));
550 }
551 src = adjust_operand (src, 0);
552 src_mode = GET_MODE (src);
553 src_size = GET_MODE_BITSIZE (GET_MODE (src));
554
555 mask = gen_reg_rtx (dst_mode);
556 shift_reg = gen_reg_rtx (dst_mode);
557 shift = dst_size - start - width;
558
559 /* It's not safe to use subreg here because the compiler assumes
560 that the SUBREG_REG is right justified in the SUBREG. */
561 convert_move (shift_reg, src, 1);
562
563 if (shift > 0)
564 {
565 switch (dst_mode)
566 {
567 case SImode:
568 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
569 break;
570 case DImode:
571 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
572 break;
573 case TImode:
574 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
575 break;
576 default:
577 abort ();
578 }
579 }
580 else if (shift < 0)
581 abort ();
582
583 switch (dst_size)
584 {
585 case 32:
586 maskbits = (-1ll << (32 - width - start));
587 if (start)
588 maskbits += (1ll << (32 - start));
589 emit_move_insn (mask, GEN_INT (maskbits));
590 break;
591 case 64:
592 maskbits = (-1ll << (64 - width - start));
593 if (start)
594 maskbits += (1ll << (64 - start));
595 emit_move_insn (mask, GEN_INT (maskbits));
596 break;
597 case 128:
598 {
599 unsigned char arr[16];
600 int i = start / 8;
601 memset (arr, 0, sizeof (arr));
602 arr[i] = 0xff >> (start & 7);
603 for (i++; i <= (start + width - 1) / 8; i++)
604 arr[i] = 0xff;
605 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
606 emit_move_insn (mask, array_to_constant (TImode, arr));
607 }
608 break;
609 default:
610 abort ();
611 }
612 if (GET_CODE (ops[0]) == MEM)
613 {
614 rtx aligned = gen_reg_rtx (SImode);
615 rtx low = gen_reg_rtx (SImode);
616 rtx addr = gen_reg_rtx (SImode);
617 rtx rotl = gen_reg_rtx (SImode);
618 rtx mask0 = gen_reg_rtx (TImode);
619 rtx mem;
620
621 emit_move_insn (addr, XEXP (ops[0], 0));
622 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
623 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
624 emit_insn (gen_negsi2 (rotl, low));
625 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
626 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
627 mem = change_address (ops[0], TImode, aligned);
628 set_mem_alias_set (mem, 0);
629 emit_move_insn (dst, mem);
630 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
631 emit_move_insn (mem, dst);
632 if (start + width > MEM_ALIGN (ops[0]))
633 {
634 rtx shl = gen_reg_rtx (SImode);
635 rtx mask1 = gen_reg_rtx (TImode);
636 rtx dst1 = gen_reg_rtx (TImode);
637 rtx mem1;
638 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
639 emit_insn (gen_shlqby_ti (mask1, mask, shl));
640 mem1 = adjust_address (mem, TImode, 16);
641 set_mem_alias_set (mem1, 0);
642 emit_move_insn (dst1, mem1);
643 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
644 emit_move_insn (mem1, dst1);
645 }
646 }
647 else
71cd778d 648 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 649}
650
651
652int
653spu_expand_block_move (rtx ops[])
654{
655 HOST_WIDE_INT bytes, align, offset;
656 rtx src, dst, sreg, dreg, target;
657 int i;
658 if (GET_CODE (ops[2]) != CONST_INT
659 || GET_CODE (ops[3]) != CONST_INT
660 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
661 return 0;
662
663 bytes = INTVAL (ops[2]);
664 align = INTVAL (ops[3]);
665
666 if (bytes <= 0)
667 return 1;
668
669 dst = ops[0];
670 src = ops[1];
671
672 if (align == 16)
673 {
674 for (offset = 0; offset + 16 <= bytes; offset += 16)
675 {
676 dst = adjust_address (ops[0], V16QImode, offset);
677 src = adjust_address (ops[1], V16QImode, offset);
678 emit_move_insn (dst, src);
679 }
680 if (offset < bytes)
681 {
682 rtx mask;
683 unsigned char arr[16] = { 0 };
684 for (i = 0; i < bytes - offset; i++)
685 arr[i] = 0xff;
686 dst = adjust_address (ops[0], V16QImode, offset);
687 src = adjust_address (ops[1], V16QImode, offset);
688 mask = gen_reg_rtx (V16QImode);
689 sreg = gen_reg_rtx (V16QImode);
690 dreg = gen_reg_rtx (V16QImode);
691 target = gen_reg_rtx (V16QImode);
692 emit_move_insn (mask, array_to_constant (V16QImode, arr));
693 emit_move_insn (dreg, dst);
694 emit_move_insn (sreg, src);
695 emit_insn (gen_selb (target, dreg, sreg, mask));
696 emit_move_insn (dst, target);
697 }
698 return 1;
699 }
700 return 0;
701}
702
703enum spu_comp_code
704{ SPU_EQ, SPU_GT, SPU_GTU };
705
5474166e 706int spu_comp_icode[12][3] = {
707 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
708 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
709 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
710 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
711 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
712 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
713 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
714 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
715 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
716 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
717 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
718 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 719};
720
721/* Generate a compare for CODE. Return a brand-new rtx that represents
722 the result of the compare. GCC can figure this out too if we don't
723 provide all variations of compares, but GCC always wants to use
724 WORD_MODE, we can generate better code in most cases if we do it
725 ourselves. */
726void
727spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
728{
729 int reverse_compare = 0;
730 int reverse_test = 0;
5d70b918 731 rtx compare_result, eq_result;
732 rtx comp_rtx, eq_rtx;
644459d0 733 rtx target = operands[0];
734 enum machine_mode comp_mode;
735 enum machine_mode op_mode;
5d70b918 736 enum spu_comp_code scode, eq_code, ior_code;
644459d0 737 int index;
5d70b918 738 int eq_test = 0;
644459d0 739
740 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
741 and so on, to keep the constant in operand 1. */
742 if (GET_CODE (spu_compare_op1) == CONST_INT)
743 {
744 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
745 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
746 switch (code)
747 {
748 case GE:
749 spu_compare_op1 = GEN_INT (val);
750 code = GT;
751 break;
752 case LT:
753 spu_compare_op1 = GEN_INT (val);
754 code = LE;
755 break;
756 case GEU:
757 spu_compare_op1 = GEN_INT (val);
758 code = GTU;
759 break;
760 case LTU:
761 spu_compare_op1 = GEN_INT (val);
762 code = LEU;
763 break;
764 default:
765 break;
766 }
767 }
768
5d70b918 769 comp_mode = SImode;
770 op_mode = GET_MODE (spu_compare_op0);
771
644459d0 772 switch (code)
773 {
774 case GE:
644459d0 775 scode = SPU_GT;
07027691 776 if (HONOR_NANS (op_mode))
5d70b918 777 {
778 reverse_compare = 0;
779 reverse_test = 0;
780 eq_test = 1;
781 eq_code = SPU_EQ;
782 }
783 else
784 {
785 reverse_compare = 1;
786 reverse_test = 1;
787 }
644459d0 788 break;
789 case LE:
644459d0 790 scode = SPU_GT;
07027691 791 if (HONOR_NANS (op_mode))
5d70b918 792 {
793 reverse_compare = 1;
794 reverse_test = 0;
795 eq_test = 1;
796 eq_code = SPU_EQ;
797 }
798 else
799 {
800 reverse_compare = 0;
801 reverse_test = 1;
802 }
644459d0 803 break;
804 case LT:
805 reverse_compare = 1;
806 reverse_test = 0;
807 scode = SPU_GT;
808 break;
809 case GEU:
810 reverse_compare = 1;
811 reverse_test = 1;
812 scode = SPU_GTU;
813 break;
814 case LEU:
815 reverse_compare = 0;
816 reverse_test = 1;
817 scode = SPU_GTU;
818 break;
819 case LTU:
820 reverse_compare = 1;
821 reverse_test = 0;
822 scode = SPU_GTU;
823 break;
824 case NE:
825 reverse_compare = 0;
826 reverse_test = 1;
827 scode = SPU_EQ;
828 break;
829
830 case EQ:
831 scode = SPU_EQ;
832 break;
833 case GT:
834 scode = SPU_GT;
835 break;
836 case GTU:
837 scode = SPU_GTU;
838 break;
839 default:
840 scode = SPU_EQ;
841 break;
842 }
843
644459d0 844 switch (op_mode)
845 {
846 case QImode:
847 index = 0;
848 comp_mode = QImode;
849 break;
850 case HImode:
851 index = 1;
852 comp_mode = HImode;
853 break;
854 case SImode:
855 index = 2;
856 break;
857 case DImode:
858 index = 3;
859 break;
860 case TImode:
861 index = 4;
862 break;
863 case SFmode:
864 index = 5;
865 break;
866 case DFmode:
867 index = 6;
868 break;
869 case V16QImode:
5474166e 870 index = 7;
871 comp_mode = op_mode;
872 break;
644459d0 873 case V8HImode:
5474166e 874 index = 8;
875 comp_mode = op_mode;
876 break;
644459d0 877 case V4SImode:
5474166e 878 index = 9;
879 comp_mode = op_mode;
880 break;
644459d0 881 case V4SFmode:
5474166e 882 index = 10;
883 comp_mode = V4SImode;
884 break;
644459d0 885 case V2DFmode:
5474166e 886 index = 11;
887 comp_mode = V2DImode;
644459d0 888 break;
5474166e 889 case V2DImode:
644459d0 890 default:
891 abort ();
892 }
893
07027691 894 if (GET_MODE (spu_compare_op1) == DFmode
895 && (scode != SPU_GT && scode != SPU_EQ))
896 abort ();
644459d0 897
898 if (is_set == 0 && spu_compare_op1 == const0_rtx
899 && (GET_MODE (spu_compare_op0) == SImode
900 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
901 {
902 /* Don't need to set a register with the result when we are
903 comparing against zero and branching. */
904 reverse_test = !reverse_test;
905 compare_result = spu_compare_op0;
906 }
907 else
908 {
909 compare_result = gen_reg_rtx (comp_mode);
910
911 if (reverse_compare)
912 {
913 rtx t = spu_compare_op1;
914 spu_compare_op1 = spu_compare_op0;
915 spu_compare_op0 = t;
916 }
917
918 if (spu_comp_icode[index][scode] == 0)
919 abort ();
920
921 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
922 (spu_compare_op0, op_mode))
923 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
924 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
925 (spu_compare_op1, op_mode))
926 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
927 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
928 spu_compare_op0,
929 spu_compare_op1);
930 if (comp_rtx == 0)
931 abort ();
932 emit_insn (comp_rtx);
933
5d70b918 934 if (eq_test)
935 {
936 eq_result = gen_reg_rtx (comp_mode);
937 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
938 spu_compare_op0,
939 spu_compare_op1);
940 if (eq_rtx == 0)
941 abort ();
942 emit_insn (eq_rtx);
943 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
944 gcc_assert (ior_code != CODE_FOR_nothing);
945 emit_insn (GEN_FCN (ior_code)
946 (compare_result, compare_result, eq_result));
947 }
644459d0 948 }
949
950 if (is_set == 0)
951 {
952 rtx bcomp;
953 rtx loc_ref;
954
955 /* We don't have branch on QI compare insns, so we convert the
956 QI compare result to a HI result. */
957 if (comp_mode == QImode)
958 {
959 rtx old_res = compare_result;
960 compare_result = gen_reg_rtx (HImode);
961 comp_mode = HImode;
962 emit_insn (gen_extendqihi2 (compare_result, old_res));
963 }
964
965 if (reverse_test)
966 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
967 else
968 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
969
970 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
971 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
972 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
973 loc_ref, pc_rtx)));
974 }
975 else if (is_set == 2)
976 {
977 int compare_size = GET_MODE_BITSIZE (comp_mode);
978 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
979 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
980 rtx select_mask;
981 rtx op_t = operands[2];
982 rtx op_f = operands[3];
983
984 /* The result of the comparison can be SI, HI or QI mode. Create a
985 mask based on that result. */
986 if (target_size > compare_size)
987 {
988 select_mask = gen_reg_rtx (mode);
989 emit_insn (gen_extend_compare (select_mask, compare_result));
990 }
991 else if (target_size < compare_size)
992 select_mask =
993 gen_rtx_SUBREG (mode, compare_result,
994 (compare_size - target_size) / BITS_PER_UNIT);
995 else if (comp_mode != mode)
996 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
997 else
998 select_mask = compare_result;
999
1000 if (GET_MODE (target) != GET_MODE (op_t)
1001 || GET_MODE (target) != GET_MODE (op_f))
1002 abort ();
1003
1004 if (reverse_test)
1005 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1006 else
1007 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1008 }
1009 else
1010 {
1011 if (reverse_test)
1012 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1013 gen_rtx_NOT (comp_mode, compare_result)));
1014 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1015 emit_insn (gen_extendhisi2 (target, compare_result));
1016 else if (GET_MODE (target) == SImode
1017 && GET_MODE (compare_result) == QImode)
1018 emit_insn (gen_extend_compare (target, compare_result));
1019 else
1020 emit_move_insn (target, compare_result);
1021 }
1022}
1023
1024HOST_WIDE_INT
1025const_double_to_hwint (rtx x)
1026{
1027 HOST_WIDE_INT val;
1028 REAL_VALUE_TYPE rv;
1029 if (GET_MODE (x) == SFmode)
1030 {
1031 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1032 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1033 }
1034 else if (GET_MODE (x) == DFmode)
1035 {
1036 long l[2];
1037 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1038 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1039 val = l[0];
1040 val = (val << 32) | (l[1] & 0xffffffff);
1041 }
1042 else
1043 abort ();
1044 return val;
1045}
1046
1047rtx
1048hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1049{
1050 long tv[2];
1051 REAL_VALUE_TYPE rv;
1052 gcc_assert (mode == SFmode || mode == DFmode);
1053
1054 if (mode == SFmode)
1055 tv[0] = (v << 32) >> 32;
1056 else if (mode == DFmode)
1057 {
1058 tv[1] = (v << 32) >> 32;
1059 tv[0] = v >> 32;
1060 }
1061 real_from_target (&rv, tv, mode);
1062 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1063}
1064
1065void
1066print_operand_address (FILE * file, register rtx addr)
1067{
1068 rtx reg;
1069 rtx offset;
1070
e04cf423 1071 if (GET_CODE (addr) == AND
1072 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1073 && INTVAL (XEXP (addr, 1)) == -16)
1074 addr = XEXP (addr, 0);
1075
644459d0 1076 switch (GET_CODE (addr))
1077 {
1078 case REG:
1079 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1080 break;
1081
1082 case PLUS:
1083 reg = XEXP (addr, 0);
1084 offset = XEXP (addr, 1);
1085 if (GET_CODE (offset) == REG)
1086 {
1087 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1088 reg_names[REGNO (offset)]);
1089 }
1090 else if (GET_CODE (offset) == CONST_INT)
1091 {
1092 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1093 INTVAL (offset), reg_names[REGNO (reg)]);
1094 }
1095 else
1096 abort ();
1097 break;
1098
1099 case CONST:
1100 case LABEL_REF:
1101 case SYMBOL_REF:
1102 case CONST_INT:
1103 output_addr_const (file, addr);
1104 break;
1105
1106 default:
1107 debug_rtx (addr);
1108 abort ();
1109 }
1110}
1111
1112void
1113print_operand (FILE * file, rtx x, int code)
1114{
1115 enum machine_mode mode = GET_MODE (x);
1116 HOST_WIDE_INT val;
1117 unsigned char arr[16];
1118 int xcode = GET_CODE (x);
dea01258 1119 int i, info;
644459d0 1120 if (GET_MODE (x) == VOIDmode)
1121 switch (code)
1122 {
644459d0 1123 case 'L': /* 128 bits, signed */
1124 case 'm': /* 128 bits, signed */
1125 case 'T': /* 128 bits, signed */
1126 case 't': /* 128 bits, signed */
1127 mode = TImode;
1128 break;
644459d0 1129 case 'K': /* 64 bits, signed */
1130 case 'k': /* 64 bits, signed */
1131 case 'D': /* 64 bits, signed */
1132 case 'd': /* 64 bits, signed */
1133 mode = DImode;
1134 break;
644459d0 1135 case 'J': /* 32 bits, signed */
1136 case 'j': /* 32 bits, signed */
1137 case 's': /* 32 bits, signed */
1138 case 'S': /* 32 bits, signed */
1139 mode = SImode;
1140 break;
1141 }
1142 switch (code)
1143 {
1144
1145 case 'j': /* 32 bits, signed */
1146 case 'k': /* 64 bits, signed */
1147 case 'm': /* 128 bits, signed */
1148 if (xcode == CONST_INT
1149 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1150 {
1151 gcc_assert (logical_immediate_p (x, mode));
1152 constant_to_array (mode, x, arr);
1153 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1154 val = trunc_int_for_mode (val, SImode);
1155 switch (which_logical_immediate (val))
1156 {
1157 case SPU_ORI:
1158 break;
1159 case SPU_ORHI:
1160 fprintf (file, "h");
1161 break;
1162 case SPU_ORBI:
1163 fprintf (file, "b");
1164 break;
1165 default:
1166 gcc_unreachable();
1167 }
1168 }
1169 else
1170 gcc_unreachable();
1171 return;
1172
1173 case 'J': /* 32 bits, signed */
1174 case 'K': /* 64 bits, signed */
1175 case 'L': /* 128 bits, signed */
1176 if (xcode == CONST_INT
1177 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1178 {
1179 gcc_assert (logical_immediate_p (x, mode)
1180 || iohl_immediate_p (x, mode));
1181 constant_to_array (mode, x, arr);
1182 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1183 val = trunc_int_for_mode (val, SImode);
1184 switch (which_logical_immediate (val))
1185 {
1186 case SPU_ORI:
1187 case SPU_IOHL:
1188 break;
1189 case SPU_ORHI:
1190 val = trunc_int_for_mode (val, HImode);
1191 break;
1192 case SPU_ORBI:
1193 val = trunc_int_for_mode (val, QImode);
1194 break;
1195 default:
1196 gcc_unreachable();
1197 }
1198 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1199 }
1200 else
1201 gcc_unreachable();
1202 return;
1203
1204 case 't': /* 128 bits, signed */
1205 case 'd': /* 64 bits, signed */
1206 case 's': /* 32 bits, signed */
dea01258 1207 if (CONSTANT_P (x))
644459d0 1208 {
dea01258 1209 enum immediate_class c = classify_immediate (x, mode);
1210 switch (c)
1211 {
1212 case IC_IL1:
1213 constant_to_array (mode, x, arr);
1214 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1215 val = trunc_int_for_mode (val, SImode);
1216 switch (which_immediate_load (val))
1217 {
1218 case SPU_IL:
1219 break;
1220 case SPU_ILA:
1221 fprintf (file, "a");
1222 break;
1223 case SPU_ILH:
1224 fprintf (file, "h");
1225 break;
1226 case SPU_ILHU:
1227 fprintf (file, "hu");
1228 break;
1229 default:
1230 gcc_unreachable ();
1231 }
1232 break;
1233 case IC_CPAT:
1234 constant_to_array (mode, x, arr);
1235 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1236 if (info == 1)
1237 fprintf (file, "b");
1238 else if (info == 2)
1239 fprintf (file, "h");
1240 else if (info == 4)
1241 fprintf (file, "w");
1242 else if (info == 8)
1243 fprintf (file, "d");
1244 break;
1245 case IC_IL1s:
1246 if (xcode == CONST_VECTOR)
1247 {
1248 x = CONST_VECTOR_ELT (x, 0);
1249 xcode = GET_CODE (x);
1250 }
1251 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1252 fprintf (file, "a");
1253 else if (xcode == HIGH)
1254 fprintf (file, "hu");
1255 break;
1256 case IC_FSMBI:
5df189be 1257 case IC_FSMBI2:
dea01258 1258 case IC_IL2:
1259 case IC_IL2s:
1260 case IC_POOL:
1261 abort ();
1262 }
644459d0 1263 }
644459d0 1264 else
1265 gcc_unreachable ();
1266 return;
1267
1268 case 'T': /* 128 bits, signed */
1269 case 'D': /* 64 bits, signed */
1270 case 'S': /* 32 bits, signed */
dea01258 1271 if (CONSTANT_P (x))
644459d0 1272 {
dea01258 1273 enum immediate_class c = classify_immediate (x, mode);
1274 switch (c)
644459d0 1275 {
dea01258 1276 case IC_IL1:
1277 constant_to_array (mode, x, arr);
1278 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1279 val = trunc_int_for_mode (val, SImode);
1280 switch (which_immediate_load (val))
1281 {
1282 case SPU_IL:
1283 case SPU_ILA:
1284 break;
1285 case SPU_ILH:
1286 case SPU_ILHU:
1287 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1288 break;
1289 default:
1290 gcc_unreachable ();
1291 }
1292 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1293 break;
1294 case IC_FSMBI:
1295 constant_to_array (mode, x, arr);
1296 val = 0;
1297 for (i = 0; i < 16; i++)
1298 {
1299 val <<= 1;
1300 val |= arr[i] & 1;
1301 }
1302 print_operand (file, GEN_INT (val), 0);
1303 break;
1304 case IC_CPAT:
1305 constant_to_array (mode, x, arr);
1306 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1307 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1308 break;
dea01258 1309 case IC_IL1s:
dea01258 1310 if (xcode == HIGH)
5df189be 1311 x = XEXP (x, 0);
1312 if (GET_CODE (x) == CONST_VECTOR)
1313 x = CONST_VECTOR_ELT (x, 0);
1314 output_addr_const (file, x);
1315 if (xcode == HIGH)
1316 fprintf (file, "@h");
644459d0 1317 break;
dea01258 1318 case IC_IL2:
1319 case IC_IL2s:
5df189be 1320 case IC_FSMBI2:
dea01258 1321 case IC_POOL:
1322 abort ();
644459d0 1323 }
c8befdb9 1324 }
644459d0 1325 else
1326 gcc_unreachable ();
1327 return;
1328
644459d0 1329 case 'C':
1330 if (xcode == CONST_INT)
1331 {
1332 /* Only 4 least significant bits are relevant for generate
1333 control word instructions. */
1334 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1335 return;
1336 }
1337 break;
1338
1339 case 'M': /* print code for c*d */
1340 if (GET_CODE (x) == CONST_INT)
1341 switch (INTVAL (x))
1342 {
1343 case 1:
1344 fprintf (file, "b");
1345 break;
1346 case 2:
1347 fprintf (file, "h");
1348 break;
1349 case 4:
1350 fprintf (file, "w");
1351 break;
1352 case 8:
1353 fprintf (file, "d");
1354 break;
1355 default:
1356 gcc_unreachable();
1357 }
1358 else
1359 gcc_unreachable();
1360 return;
1361
1362 case 'N': /* Negate the operand */
1363 if (xcode == CONST_INT)
1364 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1365 else if (xcode == CONST_VECTOR)
1366 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1367 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1368 return;
1369
1370 case 'I': /* enable/disable interrupts */
1371 if (xcode == CONST_INT)
1372 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1373 return;
1374
1375 case 'b': /* branch modifiers */
1376 if (xcode == REG)
1377 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1378 else if (COMPARISON_P (x))
1379 fprintf (file, "%s", xcode == NE ? "n" : "");
1380 return;
1381
1382 case 'i': /* indirect call */
1383 if (xcode == MEM)
1384 {
1385 if (GET_CODE (XEXP (x, 0)) == REG)
1386 /* Used in indirect function calls. */
1387 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1388 else
1389 output_address (XEXP (x, 0));
1390 }
1391 return;
1392
1393 case 'p': /* load/store */
1394 if (xcode == MEM)
1395 {
1396 x = XEXP (x, 0);
1397 xcode = GET_CODE (x);
1398 }
e04cf423 1399 if (xcode == AND)
1400 {
1401 x = XEXP (x, 0);
1402 xcode = GET_CODE (x);
1403 }
644459d0 1404 if (xcode == REG)
1405 fprintf (file, "d");
1406 else if (xcode == CONST_INT)
1407 fprintf (file, "a");
1408 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1409 fprintf (file, "r");
1410 else if (xcode == PLUS || xcode == LO_SUM)
1411 {
1412 if (GET_CODE (XEXP (x, 1)) == REG)
1413 fprintf (file, "x");
1414 else
1415 fprintf (file, "d");
1416 }
1417 return;
1418
5df189be 1419 case 'e':
1420 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1421 val &= 0x7;
1422 output_addr_const (file, GEN_INT (val));
1423 return;
1424
1425 case 'f':
1426 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1427 val &= 0x1f;
1428 output_addr_const (file, GEN_INT (val));
1429 return;
1430
1431 case 'g':
1432 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1433 val &= 0x3f;
1434 output_addr_const (file, GEN_INT (val));
1435 return;
1436
1437 case 'h':
1438 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1439 val = (val >> 3) & 0x1f;
1440 output_addr_const (file, GEN_INT (val));
1441 return;
1442
1443 case 'E':
1444 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1445 val = -val;
1446 val &= 0x7;
1447 output_addr_const (file, GEN_INT (val));
1448 return;
1449
1450 case 'F':
1451 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1452 val = -val;
1453 val &= 0x1f;
1454 output_addr_const (file, GEN_INT (val));
1455 return;
1456
1457 case 'G':
1458 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1459 val = -val;
1460 val &= 0x3f;
1461 output_addr_const (file, GEN_INT (val));
1462 return;
1463
1464 case 'H':
1465 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1466 val = -(val & -8ll);
1467 val = (val >> 3) & 0x1f;
1468 output_addr_const (file, GEN_INT (val));
1469 return;
1470
644459d0 1471 case 0:
1472 if (xcode == REG)
1473 fprintf (file, "%s", reg_names[REGNO (x)]);
1474 else if (xcode == MEM)
1475 output_address (XEXP (x, 0));
1476 else if (xcode == CONST_VECTOR)
dea01258 1477 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1478 else
1479 output_addr_const (file, x);
1480 return;
1481
f6a0d06f 1482 /* unused letters
5df189be 1483 o qr uvw yz
1484 AB OPQR UVWXYZ */
644459d0 1485 default:
1486 output_operand_lossage ("invalid %%xn code");
1487 }
1488 gcc_unreachable ();
1489}
1490
1491extern char call_used_regs[];
644459d0 1492
1493/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1494 caller saved register. For leaf functions it is more efficient to
1495 use a volatile register because we won't need to save and restore the
1496 pic register. This routine is only valid after register allocation
1497 is completed, so we can pick an unused register. */
1498static rtx
1499get_pic_reg (void)
1500{
1501 rtx pic_reg = pic_offset_table_rtx;
1502 if (!reload_completed && !reload_in_progress)
1503 abort ();
1504 return pic_reg;
1505}
1506
5df189be 1507/* Split constant addresses to handle cases that are too large.
1508 Add in the pic register when in PIC mode.
1509 Split immediates that require more than 1 instruction. */
dea01258 1510int
1511spu_split_immediate (rtx * ops)
c8befdb9 1512{
dea01258 1513 enum machine_mode mode = GET_MODE (ops[0]);
1514 enum immediate_class c = classify_immediate (ops[1], mode);
1515
1516 switch (c)
c8befdb9 1517 {
dea01258 1518 case IC_IL2:
1519 {
1520 unsigned char arrhi[16];
1521 unsigned char arrlo[16];
1522 rtx to, hi, lo;
1523 int i;
1524 constant_to_array (mode, ops[1], arrhi);
e1ba4a27 1525 to = !can_create_pseudo_p () ? ops[0] : gen_reg_rtx (mode);
dea01258 1526 for (i = 0; i < 16; i += 4)
1527 {
1528 arrlo[i + 2] = arrhi[i + 2];
1529 arrlo[i + 3] = arrhi[i + 3];
1530 arrlo[i + 0] = arrlo[i + 1] = 0;
1531 arrhi[i + 2] = arrhi[i + 3] = 0;
1532 }
1533 hi = array_to_constant (mode, arrhi);
1534 lo = array_to_constant (mode, arrlo);
1535 emit_move_insn (to, hi);
1536 emit_insn (gen_rtx_SET
1537 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1538 return 1;
1539 }
5df189be 1540 case IC_FSMBI2:
1541 {
1542 unsigned char arr_fsmbi[16];
1543 unsigned char arr_andbi[16];
1544 rtx to, reg_fsmbi, reg_and;
1545 int i;
1546 enum machine_mode imode = mode;
1547 /* We need to do reals as ints because the constant used in the
1548 * AND might not be a legitimate real constant. */
1549 imode = int_mode_for_mode (mode);
1550 constant_to_array (mode, ops[1], arr_fsmbi);
1551 if (imode != mode)
1552 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1553 else
1554 to = ops[0];
1555 for (i = 0; i < 16; i++)
1556 if (arr_fsmbi[i] != 0)
1557 {
1558 arr_andbi[0] = arr_fsmbi[i];
1559 arr_fsmbi[i] = 0xff;
1560 }
1561 for (i = 1; i < 16; i++)
1562 arr_andbi[i] = arr_andbi[0];
1563 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1564 reg_and = array_to_constant (imode, arr_andbi);
1565 emit_move_insn (to, reg_fsmbi);
1566 emit_insn (gen_rtx_SET
1567 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1568 return 1;
1569 }
dea01258 1570 case IC_POOL:
1571 if (reload_in_progress || reload_completed)
1572 {
1573 rtx mem = force_const_mem (mode, ops[1]);
1574 if (TARGET_LARGE_MEM)
1575 {
1576 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1577 emit_move_insn (addr, XEXP (mem, 0));
1578 mem = replace_equiv_address (mem, addr);
1579 }
1580 emit_move_insn (ops[0], mem);
1581 return 1;
1582 }
1583 break;
1584 case IC_IL1s:
1585 case IC_IL2s:
1586 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1587 {
1588 if (c == IC_IL2s)
1589 {
5df189be 1590 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1591 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1592 }
1593 else if (flag_pic)
1594 emit_insn (gen_pic (ops[0], ops[1]));
1595 if (flag_pic)
1596 {
1597 rtx pic_reg = get_pic_reg ();
1598 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1599 crtl->uses_pic_offset_table = 1;
dea01258 1600 }
1601 return flag_pic || c == IC_IL2s;
1602 }
1603 break;
1604 case IC_IL1:
1605 case IC_FSMBI:
1606 case IC_CPAT:
1607 break;
c8befdb9 1608 }
dea01258 1609 return 0;
c8befdb9 1610}
1611
644459d0 1612/* SAVING is TRUE when we are generating the actual load and store
1613 instructions for REGNO. When determining the size of the stack
1614 needed for saving register we must allocate enough space for the
1615 worst case, because we don't always have the information early enough
1616 to not allocate it. But we can at least eliminate the actual loads
1617 and stores during the prologue/epilogue. */
1618static int
1619need_to_save_reg (int regno, int saving)
1620{
3072d30e 1621 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1622 return 1;
1623 if (flag_pic
1624 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1625 && (!saving || crtl->uses_pic_offset_table)
644459d0 1626 && (!saving
3072d30e 1627 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1628 return 1;
1629 return 0;
1630}
1631
1632/* This function is only correct starting with local register
1633 allocation */
1634int
1635spu_saved_regs_size (void)
1636{
1637 int reg_save_size = 0;
1638 int regno;
1639
1640 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1641 if (need_to_save_reg (regno, 0))
1642 reg_save_size += 0x10;
1643 return reg_save_size;
1644}
1645
1646static rtx
1647frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1648{
1649 rtx reg = gen_rtx_REG (V4SImode, regno);
1650 rtx mem =
1651 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1652 return emit_insn (gen_movv4si (mem, reg));
1653}
1654
1655static rtx
1656frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1657{
1658 rtx reg = gen_rtx_REG (V4SImode, regno);
1659 rtx mem =
1660 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1661 return emit_insn (gen_movv4si (reg, mem));
1662}
1663
1664/* This happens after reload, so we need to expand it. */
1665static rtx
1666frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1667{
1668 rtx insn;
1669 if (satisfies_constraint_K (GEN_INT (imm)))
1670 {
1671 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1672 }
1673 else
1674 {
3072d30e 1675 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1676 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1677 if (REGNO (src) == REGNO (scratch))
1678 abort ();
1679 }
644459d0 1680 return insn;
1681}
1682
1683/* Return nonzero if this function is known to have a null epilogue. */
1684
1685int
1686direct_return (void)
1687{
1688 if (reload_completed)
1689 {
1690 if (cfun->static_chain_decl == 0
1691 && (spu_saved_regs_size ()
1692 + get_frame_size ()
abe32cce 1693 + crtl->outgoing_args_size
1694 + crtl->args.pretend_args_size == 0)
644459d0 1695 && current_function_is_leaf)
1696 return 1;
1697 }
1698 return 0;
1699}
1700
1701/*
1702 The stack frame looks like this:
1703 +-------------+
1704 | incoming |
1705 AP | args |
1706 +-------------+
1707 | $lr save |
1708 +-------------+
1709 prev SP | back chain |
1710 +-------------+
1711 | var args |
abe32cce 1712 | reg save | crtl->args.pretend_args_size bytes
644459d0 1713 +-------------+
1714 | ... |
1715 | saved regs | spu_saved_regs_size() bytes
1716 +-------------+
1717 | ... |
1718 FP | vars | get_frame_size() bytes
1719 +-------------+
1720 | ... |
1721 | outgoing |
abe32cce 1722 | args | crtl->outgoing_args_size bytes
644459d0 1723 +-------------+
1724 | $lr of next |
1725 | frame |
1726 +-------------+
1727 SP | back chain |
1728 +-------------+
1729
1730*/
1731void
1732spu_expand_prologue (void)
1733{
1734 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1735 HOST_WIDE_INT total_size;
1736 HOST_WIDE_INT saved_regs_size;
1737 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1738 rtx scratch_reg_0, scratch_reg_1;
1739 rtx insn, real;
1740
1741 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1742 the "toplevel" insn chain. */
1743 emit_note (NOTE_INSN_DELETED);
1744
1745 if (flag_pic && optimize == 0)
18d50ae6 1746 crtl->uses_pic_offset_table = 1;
644459d0 1747
1748 if (spu_naked_function_p (current_function_decl))
1749 return;
1750
1751 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1752 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1753
1754 saved_regs_size = spu_saved_regs_size ();
1755 total_size = size + saved_regs_size
abe32cce 1756 + crtl->outgoing_args_size
1757 + crtl->args.pretend_args_size;
644459d0 1758
1759 if (!current_function_is_leaf
18d50ae6 1760 || cfun->calls_alloca || total_size > 0)
644459d0 1761 total_size += STACK_POINTER_OFFSET;
1762
1763 /* Save this first because code after this might use the link
1764 register as a scratch register. */
1765 if (!current_function_is_leaf)
1766 {
1767 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1768 RTX_FRAME_RELATED_P (insn) = 1;
1769 }
1770
1771 if (total_size > 0)
1772 {
abe32cce 1773 offset = -crtl->args.pretend_args_size;
644459d0 1774 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1775 if (need_to_save_reg (regno, 1))
1776 {
1777 offset -= 16;
1778 insn = frame_emit_store (regno, sp_reg, offset);
1779 RTX_FRAME_RELATED_P (insn) = 1;
1780 }
1781 }
1782
18d50ae6 1783 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1784 {
1785 rtx pic_reg = get_pic_reg ();
1786 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1787 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1788 }
1789
1790 if (total_size > 0)
1791 {
1792 if (flag_stack_check)
1793 {
d819917f 1794 /* We compare against total_size-1 because
644459d0 1795 ($sp >= total_size) <=> ($sp > total_size-1) */
1796 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1797 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1798 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1799 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1800 {
1801 emit_move_insn (scratch_v4si, size_v4si);
1802 size_v4si = scratch_v4si;
1803 }
1804 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1805 emit_insn (gen_vec_extractv4si
1806 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1807 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1808 }
1809
1810 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1811 the value of the previous $sp because we save it as the back
1812 chain. */
1813 if (total_size <= 2000)
1814 {
1815 /* In this case we save the back chain first. */
1816 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1817 insn =
1818 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1819 }
1820 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1821 {
1822 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1823 insn =
1824 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1825 }
1826 else
1827 {
1828 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1829 insn =
1830 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1831 }
1832 RTX_FRAME_RELATED_P (insn) = 1;
1833 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1834 REG_NOTES (insn) =
1835 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1836
1837 if (total_size > 2000)
1838 {
1839 /* Save the back chain ptr */
1840 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1841 }
1842
1843 if (frame_pointer_needed)
1844 {
1845 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1846 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1847 + crtl->outgoing_args_size;
644459d0 1848 /* Set the new frame_pointer */
d8dfeb55 1849 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1850 RTX_FRAME_RELATED_P (insn) = 1;
1851 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1852 REG_NOTES (insn) =
1853 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1854 real, REG_NOTES (insn));
5df189be 1855 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1856 }
1857 }
1858
1859 emit_note (NOTE_INSN_DELETED);
1860}
1861
1862void
1863spu_expand_epilogue (bool sibcall_p)
1864{
1865 int size = get_frame_size (), offset, regno;
1866 HOST_WIDE_INT saved_regs_size, total_size;
1867 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1868 rtx jump, scratch_reg_0;
1869
1870 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1871 the "toplevel" insn chain. */
1872 emit_note (NOTE_INSN_DELETED);
1873
1874 if (spu_naked_function_p (current_function_decl))
1875 return;
1876
1877 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1878
1879 saved_regs_size = spu_saved_regs_size ();
1880 total_size = size + saved_regs_size
abe32cce 1881 + crtl->outgoing_args_size
1882 + crtl->args.pretend_args_size;
644459d0 1883
1884 if (!current_function_is_leaf
18d50ae6 1885 || cfun->calls_alloca || total_size > 0)
644459d0 1886 total_size += STACK_POINTER_OFFSET;
1887
1888 if (total_size > 0)
1889 {
18d50ae6 1890 if (cfun->calls_alloca)
644459d0 1891 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1892 else
1893 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1894
1895
1896 if (saved_regs_size > 0)
1897 {
abe32cce 1898 offset = -crtl->args.pretend_args_size;
644459d0 1899 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1900 if (need_to_save_reg (regno, 1))
1901 {
1902 offset -= 0x10;
1903 frame_emit_load (regno, sp_reg, offset);
1904 }
1905 }
1906 }
1907
1908 if (!current_function_is_leaf)
1909 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1910
1911 if (!sibcall_p)
1912 {
18b42941 1913 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 1914 jump = emit_jump_insn (gen__return ());
1915 emit_barrier_after (jump);
1916 }
1917
1918 emit_note (NOTE_INSN_DELETED);
1919}
1920
1921rtx
1922spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1923{
1924 if (count != 0)
1925 return 0;
1926 /* This is inefficient because it ends up copying to a save-register
1927 which then gets saved even though $lr has already been saved. But
1928 it does generate better code for leaf functions and we don't need
1929 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1930 used for __builtin_return_address anyway, so maybe we don't care if
1931 it's inefficient. */
1932 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1933}
1934\f
1935
1936/* Given VAL, generate a constant appropriate for MODE.
1937 If MODE is a vector mode, every element will be VAL.
1938 For TImode, VAL will be zero extended to 128 bits. */
1939rtx
1940spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1941{
1942 rtx inner;
1943 rtvec v;
1944 int units, i;
1945
1946 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1947 || GET_MODE_CLASS (mode) == MODE_FLOAT
1948 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1949 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1950
1951 if (GET_MODE_CLASS (mode) == MODE_INT)
1952 return immed_double_const (val, 0, mode);
1953
1954 /* val is the bit representation of the float */
1955 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1956 return hwint_to_const_double (mode, val);
1957
1958 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1959 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1960 else
1961 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1962
1963 units = GET_MODE_NUNITS (mode);
1964
1965 v = rtvec_alloc (units);
1966
1967 for (i = 0; i < units; ++i)
1968 RTVEC_ELT (v, i) = inner;
1969
1970 return gen_rtx_CONST_VECTOR (mode, v);
1971}
1972\f
1973/* branch hint stuff */
1974
1975/* The hardware requires 8 insns between a hint and the branch it
1976 effects. This variable describes how many rtl instructions the
1977 compiler needs to see before inserting a hint. (FIXME: We should
1978 accept less and insert nops to enforce it because hinting is always
1979 profitable for performance, but we do need to be careful of code
1980 size.) */
1981int spu_hint_dist = (8 * 4);
1982
5474166e 1983/* Create a MODE vector constant from 4 ints. */
1984rtx
1985spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1986{
1987 unsigned char arr[16];
1988 arr[0] = (a >> 24) & 0xff;
1989 arr[1] = (a >> 16) & 0xff;
1990 arr[2] = (a >> 8) & 0xff;
1991 arr[3] = (a >> 0) & 0xff;
1992 arr[4] = (b >> 24) & 0xff;
1993 arr[5] = (b >> 16) & 0xff;
1994 arr[6] = (b >> 8) & 0xff;
1995 arr[7] = (b >> 0) & 0xff;
1996 arr[8] = (c >> 24) & 0xff;
1997 arr[9] = (c >> 16) & 0xff;
1998 arr[10] = (c >> 8) & 0xff;
1999 arr[11] = (c >> 0) & 0xff;
2000 arr[12] = (d >> 24) & 0xff;
2001 arr[13] = (d >> 16) & 0xff;
2002 arr[14] = (d >> 8) & 0xff;
2003 arr[15] = (d >> 0) & 0xff;
2004 return array_to_constant(mode, arr);
2005}
2006
644459d0 2007/* An array of these is used to propagate hints to predecessor blocks. */
2008struct spu_bb_info
2009{
fa7637bd 2010 rtx prop_jump; /* propagated from another block */
2011 basic_block bb; /* the original block. */
644459d0 2012};
2013
2014/* The special $hbr register is used to prevent the insn scheduler from
2015 moving hbr insns across instructions which invalidate them. It
2016 should only be used in a clobber, and this function searches for
2017 insns which clobber it. */
2018static bool
2019insn_clobbers_hbr (rtx insn)
2020{
2021 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
2022 {
2023 rtx parallel = PATTERN (insn);
2024 rtx clobber;
2025 int j;
2026 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2027 {
2028 clobber = XVECEXP (parallel, 0, j);
2029 if (GET_CODE (clobber) == CLOBBER
2030 && GET_CODE (XEXP (clobber, 0)) == REG
2031 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2032 return 1;
2033 }
2034 }
2035 return 0;
2036}
2037
2038static void
2039spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
2040{
2041 rtx branch_label;
2042 rtx hint, insn, prev, next;
2043
2044 if (before == 0 || branch == 0 || target == 0)
2045 return;
2046
2047 if (distance > 600)
2048 return;
2049
2050
2051 branch_label = gen_label_rtx ();
2052 LABEL_NUSES (branch_label)++;
2053 LABEL_PRESERVE_P (branch_label) = 1;
2054 insn = emit_label_before (branch_label, branch);
2055 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2056
2057 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2058 the current insn is pipe0, dual issue with it. */
2059 prev = prev_active_insn (before);
2060 if (prev && get_pipe (prev) == 0)
2061 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2062 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
2063 {
2064 next = next_active_insn (before);
2065 hint = emit_insn_after (gen_hbr (branch_label, target), before);
2066 if (next)
2067 PUT_MODE (next, TImode);
2068 }
2069 else
2070 {
2071 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2072 PUT_MODE (hint, TImode);
2073 }
2074 recog_memoized (hint);
2075}
2076
2077/* Returns 0 if we don't want a hint for this branch. Otherwise return
2078 the rtx for the branch target. */
2079static rtx
2080get_branch_target (rtx branch)
2081{
2082 if (GET_CODE (branch) == JUMP_INSN)
2083 {
2084 rtx set, src;
2085
2086 /* Return statements */
2087 if (GET_CODE (PATTERN (branch)) == RETURN)
2088 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2089
2090 /* jump table */
2091 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2092 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2093 return 0;
2094
2095 set = single_set (branch);
2096 src = SET_SRC (set);
2097 if (GET_CODE (SET_DEST (set)) != PC)
2098 abort ();
2099
2100 if (GET_CODE (src) == IF_THEN_ELSE)
2101 {
2102 rtx lab = 0;
2103 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2104 if (note)
2105 {
2106 /* If the more probable case is not a fall through, then
2107 try a branch hint. */
2108 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2109 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2110 && GET_CODE (XEXP (src, 1)) != PC)
2111 lab = XEXP (src, 1);
2112 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2113 && GET_CODE (XEXP (src, 2)) != PC)
2114 lab = XEXP (src, 2);
2115 }
2116 if (lab)
2117 {
2118 if (GET_CODE (lab) == RETURN)
2119 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2120 return lab;
2121 }
2122 return 0;
2123 }
2124
2125 return src;
2126 }
2127 else if (GET_CODE (branch) == CALL_INSN)
2128 {
2129 rtx call;
2130 /* All of our call patterns are in a PARALLEL and the CALL is
2131 the first pattern in the PARALLEL. */
2132 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2133 abort ();
2134 call = XVECEXP (PATTERN (branch), 0, 0);
2135 if (GET_CODE (call) == SET)
2136 call = SET_SRC (call);
2137 if (GET_CODE (call) != CALL)
2138 abort ();
2139 return XEXP (XEXP (call, 0), 0);
2140 }
2141 return 0;
2142}
2143
2144static void
2145insert_branch_hints (void)
2146{
2147 struct spu_bb_info *spu_bb_info;
2148 rtx branch, insn, next;
2149 rtx branch_target = 0;
2150 int branch_addr = 0, insn_addr, head_addr;
2151 basic_block bb;
2152 unsigned int j;
2153
2154 spu_bb_info =
2155 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2156 sizeof (struct spu_bb_info));
2157
2158 /* We need exact insn addresses and lengths. */
2159 shorten_branches (get_insns ());
2160
2161 FOR_EACH_BB_REVERSE (bb)
2162 {
2163 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2164 branch = 0;
2165 if (spu_bb_info[bb->index].prop_jump)
2166 {
2167 branch = spu_bb_info[bb->index].prop_jump;
2168 branch_target = get_branch_target (branch);
2169 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2170 }
2171 /* Search from end of a block to beginning. In this loop, find
2172 jumps which need a branch and emit them only when:
2173 - it's an indirect branch and we're at the insn which sets
2174 the register
2175 - we're at an insn that will invalidate the hint. e.g., a
2176 call, another hint insn, inline asm that clobbers $hbr, and
2177 some inlined operations (divmodsi4). Don't consider jumps
2178 because they are only at the end of a block and are
2179 considered when we are deciding whether to propagate
2180 - we're getting too far away from the branch. The hbr insns
5b865faf 2181 only have a signed 10-bit offset
644459d0 2182 We go back as far as possible so the branch will be considered
2183 for propagation when we get to the beginning of the block. */
2184 next = 0;
2185 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2186 {
2187 if (INSN_P (insn))
2188 {
2189 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2190 if (branch && next
2191 && ((GET_CODE (branch_target) == REG
2192 && set_of (branch_target, insn) != NULL_RTX)
2193 || insn_clobbers_hbr (insn)
2194 || branch_addr - insn_addr > 600))
2195 {
2196 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2197 if (insn != BB_END (bb)
2198 && branch_addr - next_addr >= spu_hint_dist)
2199 {
2200 if (dump_file)
2201 fprintf (dump_file,
2202 "hint for %i in block %i before %i\n",
2203 INSN_UID (branch), bb->index, INSN_UID (next));
2204 spu_emit_branch_hint (next, branch, branch_target,
2205 branch_addr - next_addr);
2206 }
2207 branch = 0;
2208 }
2209
2210 /* JUMP_P will only be true at the end of a block. When
2211 branch is already set it means we've previously decided
2212 to propagate a hint for that branch into this block. */
2213 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2214 {
2215 branch = 0;
2216 if ((branch_target = get_branch_target (insn)))
2217 {
2218 branch = insn;
2219 branch_addr = insn_addr;
2220 }
2221 }
2222
2223 /* When a branch hint is emitted it will be inserted
2224 before "next". Make sure next is the beginning of a
2225 cycle to minimize impact on the scheduled insns. */
2226 if (GET_MODE (insn) == TImode)
2227 next = insn;
2228 }
2229 if (insn == BB_HEAD (bb))
2230 break;
2231 }
2232
2233 if (branch)
2234 {
2235 /* If we haven't emitted a hint for this branch yet, it might
2236 be profitable to emit it in one of the predecessor blocks,
2237 especially for loops. */
2238 rtx bbend;
2239 basic_block prev = 0, prop = 0, prev2 = 0;
2240 int loop_exit = 0, simple_loop = 0;
2241 int next_addr = 0;
2242 if (next)
2243 next_addr = INSN_ADDRESSES (INSN_UID (next));
2244
2245 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2246 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2247 prev = EDGE_PRED (bb, j)->src;
2248 else
2249 prev2 = EDGE_PRED (bb, j)->src;
2250
2251 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2252 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2253 loop_exit = 1;
2254 else if (EDGE_SUCC (bb, j)->dest == bb)
2255 simple_loop = 1;
2256
2257 /* If this branch is a loop exit then propagate to previous
2258 fallthru block. This catches the cases when it is a simple
2259 loop or when there is an initial branch into the loop. */
2260 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2261 prop = prev;
2262
2263 /* If there is only one adjacent predecessor. Don't propagate
2264 outside this loop. This loop_depth test isn't perfect, but
2265 I'm not sure the loop_father member is valid at this point. */
2266 else if (prev && single_pred_p (bb)
2267 && prev->loop_depth == bb->loop_depth)
2268 prop = prev;
2269
2270 /* If this is the JOIN block of a simple IF-THEN then
80777cd8 2271 propagate the hint to the HEADER block. */
644459d0 2272 else if (prev && prev2
2273 && EDGE_COUNT (bb->preds) == 2
2274 && EDGE_COUNT (prev->preds) == 1
2275 && EDGE_PRED (prev, 0)->src == prev2
2276 && prev2->loop_depth == bb->loop_depth
2277 && GET_CODE (branch_target) != REG)
2278 prop = prev;
2279
2280 /* Don't propagate when:
2281 - this is a simple loop and the hint would be too far
2282 - this is not a simple loop and there are 16 insns in
2283 this block already
2284 - the predecessor block ends in a branch that will be
2285 hinted
2286 - the predecessor block ends in an insn that invalidates
2287 the hint */
2288 if (prop
2289 && prop->index >= 0
2290 && (bbend = BB_END (prop))
2291 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2292 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2293 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2294 {
2295 if (dump_file)
2296 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2297 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2298 bb->index, prop->index, bb->loop_depth,
2299 INSN_UID (branch), loop_exit, simple_loop,
2300 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2301
2302 spu_bb_info[prop->index].prop_jump = branch;
2303 spu_bb_info[prop->index].bb = bb;
2304 }
2305 else if (next && branch_addr - next_addr >= spu_hint_dist)
2306 {
2307 if (dump_file)
2308 fprintf (dump_file, "hint for %i in block %i before %i\n",
2309 INSN_UID (branch), bb->index, INSN_UID (next));
2310 spu_emit_branch_hint (next, branch, branch_target,
2311 branch_addr - next_addr);
2312 }
2313 branch = 0;
2314 }
2315 }
2316 free (spu_bb_info);
2317}
2318\f
2319/* Emit a nop for INSN such that the two will dual issue. This assumes
2320 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2321 We check for TImode to handle a MULTI1 insn which has dual issued its
2322 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2323 ADDR_VEC insns. */
2324static void
2325emit_nop_for_insn (rtx insn)
2326{
2327 int p;
2328 rtx new_insn;
2329 p = get_pipe (insn);
2330 if (p == 1 && GET_MODE (insn) == TImode)
2331 {
2332 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2333 PUT_MODE (new_insn, TImode);
2334 PUT_MODE (insn, VOIDmode);
2335 }
2336 else
2337 new_insn = emit_insn_after (gen_lnop (), insn);
2338}
2339
2340/* Insert nops in basic blocks to meet dual issue alignment
2341 requirements. */
2342static void
2343insert_nops (void)
2344{
2345 rtx insn, next_insn, prev_insn;
2346 int length;
2347 int addr;
2348
2349 /* This sets up INSN_ADDRESSES. */
2350 shorten_branches (get_insns ());
2351
2352 /* Keep track of length added by nops. */
2353 length = 0;
2354
2355 prev_insn = 0;
2356 for (insn = get_insns (); insn; insn = next_insn)
2357 {
2358 next_insn = next_active_insn (insn);
2359 addr = INSN_ADDRESSES (INSN_UID (insn));
2360 if (GET_MODE (insn) == TImode
2361 && next_insn
2362 && GET_MODE (next_insn) != TImode
2363 && ((addr + length) & 7) != 0)
2364 {
2365 /* prev_insn will always be set because the first insn is
2366 always 8-byte aligned. */
2367 emit_nop_for_insn (prev_insn);
2368 length += 4;
2369 }
2370 prev_insn = insn;
2371 }
2372}
2373
2374static void
2375spu_machine_dependent_reorg (void)
2376{
2377 if (optimize > 0)
2378 {
2379 if (TARGET_BRANCH_HINTS)
2380 insert_branch_hints ();
2381 insert_nops ();
2382 }
2383}
2384\f
2385
2386/* Insn scheduling routines, primarily for dual issue. */
2387static int
2388spu_sched_issue_rate (void)
2389{
2390 return 2;
2391}
2392
2393static int
2394spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2395 int verbose ATTRIBUTE_UNUSED, rtx insn,
2396 int can_issue_more)
2397{
2398 if (GET_CODE (PATTERN (insn)) != USE
2399 && GET_CODE (PATTERN (insn)) != CLOBBER
2400 && get_pipe (insn) != -2)
2401 can_issue_more--;
2402 return can_issue_more;
2403}
2404
2405static int
2406get_pipe (rtx insn)
2407{
2408 enum attr_type t;
2409 /* Handle inline asm */
2410 if (INSN_CODE (insn) == -1)
2411 return -1;
2412 t = get_attr_type (insn);
2413 switch (t)
2414 {
2415 case TYPE_CONVERT:
2416 return -2;
2417 case TYPE_MULTI0:
2418 return -1;
2419
2420 case TYPE_FX2:
2421 case TYPE_FX3:
2422 case TYPE_SPR:
2423 case TYPE_NOP:
2424 case TYPE_FXB:
2425 case TYPE_FPD:
2426 case TYPE_FP6:
2427 case TYPE_FP7:
2428 case TYPE_IPREFETCH:
2429 return 0;
2430
2431 case TYPE_LNOP:
2432 case TYPE_SHUF:
2433 case TYPE_LOAD:
2434 case TYPE_STORE:
2435 case TYPE_BR:
2436 case TYPE_MULTI1:
2437 case TYPE_HBR:
2438 return 1;
2439 default:
2440 abort ();
2441 }
2442}
2443
2444static int
2445spu_sched_adjust_priority (rtx insn, int pri)
2446{
2447 int p = get_pipe (insn);
2448 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2449 * scheduling. */
2450 if (GET_CODE (PATTERN (insn)) == USE
2451 || GET_CODE (PATTERN (insn)) == CLOBBER
2452 || p == -2)
2453 return pri + 100;
2454 /* Schedule pipe0 insns early for greedier dual issue. */
2455 if (p != 1)
2456 return pri + 50;
2457 return pri;
2458}
2459
2460/* INSN is dependent on DEP_INSN. */
2461static int
2462spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2463 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2464{
2465 if (GET_CODE (insn) == CALL_INSN)
2466 return cost - 2;
2467 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2468 scheduler makes every insn in a block anti-dependent on the final
2469 jump_insn. We adjust here so higher cost insns will get scheduled
2470 earlier. */
2471 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 2472 return insn_cost (dep_insn) - 3;
644459d0 2473 return cost;
2474}
2475\f
2476/* Create a CONST_DOUBLE from a string. */
2477struct rtx_def *
2478spu_float_const (const char *string, enum machine_mode mode)
2479{
2480 REAL_VALUE_TYPE value;
2481 value = REAL_VALUE_ATOF (string, mode);
2482 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2483}
2484
644459d0 2485int
2486spu_constant_address_p (rtx x)
2487{
2488 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2489 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2490 || GET_CODE (x) == HIGH);
2491}
2492
2493static enum spu_immediate
2494which_immediate_load (HOST_WIDE_INT val)
2495{
2496 gcc_assert (val == trunc_int_for_mode (val, SImode));
2497
2498 if (val >= -0x8000 && val <= 0x7fff)
2499 return SPU_IL;
2500 if (val >= 0 && val <= 0x3ffff)
2501 return SPU_ILA;
2502 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2503 return SPU_ILH;
2504 if ((val & 0xffff) == 0)
2505 return SPU_ILHU;
2506
2507 return SPU_NONE;
2508}
2509
dea01258 2510/* Return true when OP can be loaded by one of the il instructions, or
2511 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 2512int
2513immediate_load_p (rtx op, enum machine_mode mode)
dea01258 2514{
2515 if (CONSTANT_P (op))
2516 {
2517 enum immediate_class c = classify_immediate (op, mode);
5df189be 2518 return c == IC_IL1 || c == IC_IL1s
3072d30e 2519 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 2520 }
2521 return 0;
2522}
2523
2524/* Return true if the first SIZE bytes of arr is a constant that can be
2525 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2526 represent the size and offset of the instruction to use. */
2527static int
2528cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2529{
2530 int cpat, run, i, start;
2531 cpat = 1;
2532 run = 0;
2533 start = -1;
2534 for (i = 0; i < size && cpat; i++)
2535 if (arr[i] != i+16)
2536 {
2537 if (!run)
2538 {
2539 start = i;
2540 if (arr[i] == 3)
2541 run = 1;
2542 else if (arr[i] == 2 && arr[i+1] == 3)
2543 run = 2;
2544 else if (arr[i] == 0)
2545 {
2546 while (arr[i+run] == run && i+run < 16)
2547 run++;
2548 if (run != 4 && run != 8)
2549 cpat = 0;
2550 }
2551 else
2552 cpat = 0;
2553 if ((i & (run-1)) != 0)
2554 cpat = 0;
2555 i += run;
2556 }
2557 else
2558 cpat = 0;
2559 }
b01a6dc3 2560 if (cpat && (run || size < 16))
dea01258 2561 {
2562 if (run == 0)
2563 run = 1;
2564 if (prun)
2565 *prun = run;
2566 if (pstart)
2567 *pstart = start == -1 ? 16-run : start;
2568 return 1;
2569 }
2570 return 0;
2571}
2572
2573/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 2574 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 2575static enum immediate_class
2576classify_immediate (rtx op, enum machine_mode mode)
644459d0 2577{
2578 HOST_WIDE_INT val;
2579 unsigned char arr[16];
5df189be 2580 int i, j, repeated, fsmbi, repeat;
dea01258 2581
2582 gcc_assert (CONSTANT_P (op));
2583
644459d0 2584 if (GET_MODE (op) != VOIDmode)
2585 mode = GET_MODE (op);
2586
dea01258 2587 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 2588 if (!flag_pic
2589 && mode == V4SImode
dea01258 2590 && GET_CODE (op) == CONST_VECTOR
2591 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2592 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2593 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2594 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2595 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2596 op = CONST_VECTOR_ELT (op, 0);
644459d0 2597
dea01258 2598 switch (GET_CODE (op))
2599 {
2600 case SYMBOL_REF:
2601 case LABEL_REF:
2602 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 2603
dea01258 2604 case CONST:
0cfc65d4 2605 /* We can never know if the resulting address fits in 18 bits and can be
2606 loaded with ila. For now, assume the address will not overflow if
2607 the displacement is "small" (fits 'K' constraint). */
2608 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
2609 {
2610 rtx sym = XEXP (XEXP (op, 0), 0);
2611 rtx cst = XEXP (XEXP (op, 0), 1);
2612
2613 if (GET_CODE (sym) == SYMBOL_REF
2614 && GET_CODE (cst) == CONST_INT
2615 && satisfies_constraint_K (cst))
2616 return IC_IL1s;
2617 }
2618 return IC_IL2s;
644459d0 2619
dea01258 2620 case HIGH:
2621 return IC_IL1s;
2622
2623 case CONST_VECTOR:
2624 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2625 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2626 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2627 return IC_POOL;
2628 /* Fall through. */
2629
2630 case CONST_INT:
2631 case CONST_DOUBLE:
2632 constant_to_array (mode, op, arr);
644459d0 2633
dea01258 2634 /* Check that each 4-byte slot is identical. */
2635 repeated = 1;
2636 for (i = 4; i < 16; i += 4)
2637 for (j = 0; j < 4; j++)
2638 if (arr[j] != arr[i + j])
2639 repeated = 0;
2640
2641 if (repeated)
2642 {
2643 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2644 val = trunc_int_for_mode (val, SImode);
2645
2646 if (which_immediate_load (val) != SPU_NONE)
2647 return IC_IL1;
2648 }
2649
2650 /* Any mode of 2 bytes or smaller can be loaded with an il
2651 instruction. */
2652 gcc_assert (GET_MODE_SIZE (mode) > 2);
2653
2654 fsmbi = 1;
5df189be 2655 repeat = 0;
dea01258 2656 for (i = 0; i < 16 && fsmbi; i++)
5df189be 2657 if (arr[i] != 0 && repeat == 0)
2658 repeat = arr[i];
2659 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 2660 fsmbi = 0;
2661 if (fsmbi)
5df189be 2662 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 2663
2664 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2665 return IC_CPAT;
2666
2667 if (repeated)
2668 return IC_IL2;
2669
2670 return IC_POOL;
2671 default:
2672 break;
2673 }
2674 gcc_unreachable ();
644459d0 2675}
2676
2677static enum spu_immediate
2678which_logical_immediate (HOST_WIDE_INT val)
2679{
2680 gcc_assert (val == trunc_int_for_mode (val, SImode));
2681
2682 if (val >= -0x200 && val <= 0x1ff)
2683 return SPU_ORI;
2684 if (val >= 0 && val <= 0xffff)
2685 return SPU_IOHL;
2686 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2687 {
2688 val = trunc_int_for_mode (val, HImode);
2689 if (val >= -0x200 && val <= 0x1ff)
2690 return SPU_ORHI;
2691 if ((val & 0xff) == ((val >> 8) & 0xff))
2692 {
2693 val = trunc_int_for_mode (val, QImode);
2694 if (val >= -0x200 && val <= 0x1ff)
2695 return SPU_ORBI;
2696 }
2697 }
2698 return SPU_NONE;
2699}
2700
5df189be 2701/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2702 CONST_DOUBLEs. */
2703static int
2704const_vector_immediate_p (rtx x)
2705{
2706 int i;
2707 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2708 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2709 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2710 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2711 return 0;
2712 return 1;
2713}
2714
644459d0 2715int
2716logical_immediate_p (rtx op, enum machine_mode mode)
2717{
2718 HOST_WIDE_INT val;
2719 unsigned char arr[16];
2720 int i, j;
2721
2722 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2723 || GET_CODE (op) == CONST_VECTOR);
2724
5df189be 2725 if (GET_CODE (op) == CONST_VECTOR
2726 && !const_vector_immediate_p (op))
2727 return 0;
2728
644459d0 2729 if (GET_MODE (op) != VOIDmode)
2730 mode = GET_MODE (op);
2731
2732 constant_to_array (mode, op, arr);
2733
2734 /* Check that bytes are repeated. */
2735 for (i = 4; i < 16; i += 4)
2736 for (j = 0; j < 4; j++)
2737 if (arr[j] != arr[i + j])
2738 return 0;
2739
2740 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2741 val = trunc_int_for_mode (val, SImode);
2742
2743 i = which_logical_immediate (val);
2744 return i != SPU_NONE && i != SPU_IOHL;
2745}
2746
2747int
2748iohl_immediate_p (rtx op, enum machine_mode mode)
2749{
2750 HOST_WIDE_INT val;
2751 unsigned char arr[16];
2752 int i, j;
2753
2754 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2755 || GET_CODE (op) == CONST_VECTOR);
2756
5df189be 2757 if (GET_CODE (op) == CONST_VECTOR
2758 && !const_vector_immediate_p (op))
2759 return 0;
2760
644459d0 2761 if (GET_MODE (op) != VOIDmode)
2762 mode = GET_MODE (op);
2763
2764 constant_to_array (mode, op, arr);
2765
2766 /* Check that bytes are repeated. */
2767 for (i = 4; i < 16; i += 4)
2768 for (j = 0; j < 4; j++)
2769 if (arr[j] != arr[i + j])
2770 return 0;
2771
2772 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2773 val = trunc_int_for_mode (val, SImode);
2774
2775 return val >= 0 && val <= 0xffff;
2776}
2777
2778int
2779arith_immediate_p (rtx op, enum machine_mode mode,
2780 HOST_WIDE_INT low, HOST_WIDE_INT high)
2781{
2782 HOST_WIDE_INT val;
2783 unsigned char arr[16];
2784 int bytes, i, j;
2785
2786 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2787 || GET_CODE (op) == CONST_VECTOR);
2788
5df189be 2789 if (GET_CODE (op) == CONST_VECTOR
2790 && !const_vector_immediate_p (op))
2791 return 0;
2792
644459d0 2793 if (GET_MODE (op) != VOIDmode)
2794 mode = GET_MODE (op);
2795
2796 constant_to_array (mode, op, arr);
2797
2798 if (VECTOR_MODE_P (mode))
2799 mode = GET_MODE_INNER (mode);
2800
2801 bytes = GET_MODE_SIZE (mode);
2802 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2803
2804 /* Check that bytes are repeated. */
2805 for (i = bytes; i < 16; i += bytes)
2806 for (j = 0; j < bytes; j++)
2807 if (arr[j] != arr[i + j])
2808 return 0;
2809
2810 val = arr[0];
2811 for (j = 1; j < bytes; j++)
2812 val = (val << 8) | arr[j];
2813
2814 val = trunc_int_for_mode (val, mode);
2815
2816 return val >= low && val <= high;
2817}
2818
2819/* We accept:
5b865faf 2820 - any 32-bit constant (SImode, SFmode)
644459d0 2821 - any constant that can be generated with fsmbi (any mode)
5b865faf 2822 - a 64-bit constant where the high and low bits are identical
644459d0 2823 (DImode, DFmode)
5b865faf 2824 - a 128-bit constant where the four 32-bit words match. */
644459d0 2825int
2826spu_legitimate_constant_p (rtx x)
2827{
5df189be 2828 if (GET_CODE (x) == HIGH)
2829 x = XEXP (x, 0);
644459d0 2830 /* V4SI with all identical symbols is valid. */
5df189be 2831 if (!flag_pic
2832 && GET_MODE (x) == V4SImode
644459d0 2833 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2834 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 2835 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 2836 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2837 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2838 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2839
5df189be 2840 if (GET_CODE (x) == CONST_VECTOR
2841 && !const_vector_immediate_p (x))
2842 return 0;
644459d0 2843 return 1;
2844}
2845
2846/* Valid address are:
2847 - symbol_ref, label_ref, const
2848 - reg
2849 - reg + const, where either reg or const is 16 byte aligned
2850 - reg + reg, alignment doesn't matter
2851 The alignment matters in the reg+const case because lqd and stqd
2852 ignore the 4 least significant bits of the const. (TODO: It might be
2853 preferable to allow any alignment and fix it up when splitting.) */
2854int
2855spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2856 rtx x, int reg_ok_strict)
2857{
2858 if (mode == TImode && GET_CODE (x) == AND
2859 && GET_CODE (XEXP (x, 1)) == CONST_INT
2860 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2861 x = XEXP (x, 0);
2862 switch (GET_CODE (x))
2863 {
2864 case SYMBOL_REF:
2865 case LABEL_REF:
2866 return !TARGET_LARGE_MEM;
2867
2868 case CONST:
0cfc65d4 2869 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
2870 {
2871 rtx sym = XEXP (XEXP (x, 0), 0);
2872 rtx cst = XEXP (XEXP (x, 0), 1);
2873
2874 /* Accept any symbol_ref + constant, assuming it does not
2875 wrap around the local store addressability limit. */
2876 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
2877 return 1;
2878 }
2879 return 0;
644459d0 2880
2881 case CONST_INT:
2882 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2883
2884 case SUBREG:
2885 x = XEXP (x, 0);
2886 gcc_assert (GET_CODE (x) == REG);
2887
2888 case REG:
2889 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2890
2891 case PLUS:
2892 case LO_SUM:
2893 {
2894 rtx op0 = XEXP (x, 0);
2895 rtx op1 = XEXP (x, 1);
2896 if (GET_CODE (op0) == SUBREG)
2897 op0 = XEXP (op0, 0);
2898 if (GET_CODE (op1) == SUBREG)
2899 op1 = XEXP (op1, 0);
2900 /* We can't just accept any aligned register because CSE can
2901 change it to a register that is not marked aligned and then
2902 recog will fail. So we only accept frame registers because
2903 they will only be changed to other frame registers. */
2904 if (GET_CODE (op0) == REG
2905 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2906 && GET_CODE (op1) == CONST_INT
2907 && INTVAL (op1) >= -0x2000
2908 && INTVAL (op1) <= 0x1fff
5df189be 2909 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 2910 return 1;
2911 if (GET_CODE (op0) == REG
2912 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2913 && GET_CODE (op1) == REG
2914 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2915 return 1;
2916 }
2917 break;
2918
2919 default:
2920 break;
2921 }
2922 return 0;
2923}
2924
2925/* When the address is reg + const_int, force the const_int into a
fa7637bd 2926 register. */
644459d0 2927rtx
2928spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2929 enum machine_mode mode)
2930{
2931 rtx op0, op1;
2932 /* Make sure both operands are registers. */
2933 if (GET_CODE (x) == PLUS)
2934 {
2935 op0 = XEXP (x, 0);
2936 op1 = XEXP (x, 1);
2937 if (ALIGNED_SYMBOL_REF_P (op0))
2938 {
2939 op0 = force_reg (Pmode, op0);
2940 mark_reg_pointer (op0, 128);
2941 }
2942 else if (GET_CODE (op0) != REG)
2943 op0 = force_reg (Pmode, op0);
2944 if (ALIGNED_SYMBOL_REF_P (op1))
2945 {
2946 op1 = force_reg (Pmode, op1);
2947 mark_reg_pointer (op1, 128);
2948 }
2949 else if (GET_CODE (op1) != REG)
2950 op1 = force_reg (Pmode, op1);
2951 x = gen_rtx_PLUS (Pmode, op0, op1);
2952 if (spu_legitimate_address (mode, x, 0))
2953 return x;
2954 }
2955 return NULL_RTX;
2956}
2957
2958/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2959 struct attribute_spec.handler. */
2960static tree
2961spu_handle_fndecl_attribute (tree * node,
2962 tree name,
2963 tree args ATTRIBUTE_UNUSED,
2964 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2965{
2966 if (TREE_CODE (*node) != FUNCTION_DECL)
2967 {
2968 warning (0, "`%s' attribute only applies to functions",
2969 IDENTIFIER_POINTER (name));
2970 *no_add_attrs = true;
2971 }
2972
2973 return NULL_TREE;
2974}
2975
2976/* Handle the "vector" attribute. */
2977static tree
2978spu_handle_vector_attribute (tree * node, tree name,
2979 tree args ATTRIBUTE_UNUSED,
2980 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2981{
2982 tree type = *node, result = NULL_TREE;
2983 enum machine_mode mode;
2984 int unsigned_p;
2985
2986 while (POINTER_TYPE_P (type)
2987 || TREE_CODE (type) == FUNCTION_TYPE
2988 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2989 type = TREE_TYPE (type);
2990
2991 mode = TYPE_MODE (type);
2992
2993 unsigned_p = TYPE_UNSIGNED (type);
2994 switch (mode)
2995 {
2996 case DImode:
2997 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2998 break;
2999 case SImode:
3000 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3001 break;
3002 case HImode:
3003 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3004 break;
3005 case QImode:
3006 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3007 break;
3008 case SFmode:
3009 result = V4SF_type_node;
3010 break;
3011 case DFmode:
3012 result = V2DF_type_node;
3013 break;
3014 default:
3015 break;
3016 }
3017
3018 /* Propagate qualifiers attached to the element type
3019 onto the vector type. */
3020 if (result && result != type && TYPE_QUALS (type))
3021 result = build_qualified_type (result, TYPE_QUALS (type));
3022
3023 *no_add_attrs = true; /* No need to hang on to the attribute. */
3024
3025 if (!result)
3026 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3027 else
d991e6e8 3028 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3029
3030 return NULL_TREE;
3031}
3032
f2b32076 3033/* Return nonzero if FUNC is a naked function. */
644459d0 3034static int
3035spu_naked_function_p (tree func)
3036{
3037 tree a;
3038
3039 if (TREE_CODE (func) != FUNCTION_DECL)
3040 abort ();
3041
3042 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3043 return a != NULL_TREE;
3044}
3045
3046int
3047spu_initial_elimination_offset (int from, int to)
3048{
3049 int saved_regs_size = spu_saved_regs_size ();
3050 int sp_offset = 0;
abe32cce 3051 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3052 || get_frame_size () || saved_regs_size)
3053 sp_offset = STACK_POINTER_OFFSET;
3054 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3055 return (sp_offset + crtl->outgoing_args_size);
644459d0 3056 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3057 return 0;
3058 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3059 return sp_offset + crtl->outgoing_args_size
644459d0 3060 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3061 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3062 return get_frame_size () + saved_regs_size + sp_offset;
3063 return 0;
3064}
3065
3066rtx
fb80456a 3067spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3068{
3069 enum machine_mode mode = TYPE_MODE (type);
3070 int byte_size = ((mode == BLKmode)
3071 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3072
3073 /* Make sure small structs are left justified in a register. */
3074 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3075 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3076 {
3077 enum machine_mode smode;
3078 rtvec v;
3079 int i;
3080 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3081 int n = byte_size / UNITS_PER_WORD;
3082 v = rtvec_alloc (nregs);
3083 for (i = 0; i < n; i++)
3084 {
3085 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3086 gen_rtx_REG (TImode,
3087 FIRST_RETURN_REGNUM
3088 + i),
3089 GEN_INT (UNITS_PER_WORD * i));
3090 byte_size -= UNITS_PER_WORD;
3091 }
3092
3093 if (n < nregs)
3094 {
3095 if (byte_size < 4)
3096 byte_size = 4;
3097 smode =
3098 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3099 RTVEC_ELT (v, n) =
3100 gen_rtx_EXPR_LIST (VOIDmode,
3101 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3102 GEN_INT (UNITS_PER_WORD * n));
3103 }
3104 return gen_rtx_PARALLEL (mode, v);
3105 }
3106 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3107}
3108
3109rtx
3110spu_function_arg (CUMULATIVE_ARGS cum,
3111 enum machine_mode mode,
3112 tree type, int named ATTRIBUTE_UNUSED)
3113{
3114 int byte_size;
3115
3116 if (cum >= MAX_REGISTER_ARGS)
3117 return 0;
3118
3119 byte_size = ((mode == BLKmode)
3120 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3121
3122 /* The ABI does not allow parameters to be passed partially in
3123 reg and partially in stack. */
3124 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3125 return 0;
3126
3127 /* Make sure small structs are left justified in a register. */
3128 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3129 && byte_size < UNITS_PER_WORD && byte_size > 0)
3130 {
3131 enum machine_mode smode;
3132 rtx gr_reg;
3133 if (byte_size < 4)
3134 byte_size = 4;
3135 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3136 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3137 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3138 const0_rtx);
3139 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3140 }
3141 else
3142 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3143}
3144
3145/* Variable sized types are passed by reference. */
3146static bool
3147spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3148 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3149 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3150{
3151 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3152}
3153\f
3154
3155/* Var args. */
3156
3157/* Create and return the va_list datatype.
3158
3159 On SPU, va_list is an array type equivalent to
3160
3161 typedef struct __va_list_tag
3162 {
3163 void *__args __attribute__((__aligned(16)));
3164 void *__skip __attribute__((__aligned(16)));
3165
3166 } va_list[1];
3167
fa7637bd 3168 where __args points to the arg that will be returned by the next
644459d0 3169 va_arg(), and __skip points to the previous stack frame such that
3170 when __args == __skip we should advance __args by 32 bytes. */
3171static tree
3172spu_build_builtin_va_list (void)
3173{
3174 tree f_args, f_skip, record, type_decl;
3175 bool owp;
3176
3177 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3178
3179 type_decl =
3180 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3181
3182 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3183 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3184
3185 DECL_FIELD_CONTEXT (f_args) = record;
3186 DECL_ALIGN (f_args) = 128;
3187 DECL_USER_ALIGN (f_args) = 1;
3188
3189 DECL_FIELD_CONTEXT (f_skip) = record;
3190 DECL_ALIGN (f_skip) = 128;
3191 DECL_USER_ALIGN (f_skip) = 1;
3192
3193 TREE_CHAIN (record) = type_decl;
3194 TYPE_NAME (record) = type_decl;
3195 TYPE_FIELDS (record) = f_args;
3196 TREE_CHAIN (f_args) = f_skip;
3197
3198 /* We know this is being padded and we want it too. It is an internal
3199 type so hide the warnings from the user. */
3200 owp = warn_padded;
3201 warn_padded = false;
3202
3203 layout_type (record);
3204
3205 warn_padded = owp;
3206
3207 /* The correct type is an array type of one element. */
3208 return build_array_type (record, build_index_type (size_zero_node));
3209}
3210
3211/* Implement va_start by filling the va_list structure VALIST.
3212 NEXTARG points to the first anonymous stack argument.
3213
3214 The following global variables are used to initialize
3215 the va_list structure:
3216
abe32cce 3217 crtl->args.info;
644459d0 3218 the CUMULATIVE_ARGS for this function
3219
abe32cce 3220 crtl->args.arg_offset_rtx:
644459d0 3221 holds the offset of the first anonymous stack argument
3222 (relative to the virtual arg pointer). */
3223
8a58ed0a 3224static void
644459d0 3225spu_va_start (tree valist, rtx nextarg)
3226{
3227 tree f_args, f_skip;
3228 tree args, skip, t;
3229
3230 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3231 f_skip = TREE_CHAIN (f_args);
3232
3233 valist = build_va_arg_indirect_ref (valist);
3234 args =
3235 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3236 skip =
3237 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3238
3239 /* Find the __args area. */
3240 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3241 if (crtl->args.pretend_args_size > 0)
0de36bdb 3242 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3243 size_int (-STACK_POINTER_OFFSET));
75a70cf9 3244 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 3245 TREE_SIDE_EFFECTS (t) = 1;
3246 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3247
3248 /* Find the __skip area. */
3249 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 3250 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 3251 size_int (crtl->args.pretend_args_size
0de36bdb 3252 - STACK_POINTER_OFFSET));
75a70cf9 3253 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 3254 TREE_SIDE_EFFECTS (t) = 1;
3255 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3256}
3257
3258/* Gimplify va_arg by updating the va_list structure
3259 VALIST as required to retrieve an argument of type
3260 TYPE, and returning that argument.
3261
3262 ret = va_arg(VALIST, TYPE);
3263
3264 generates code equivalent to:
3265
3266 paddedsize = (sizeof(TYPE) + 15) & -16;
3267 if (VALIST.__args + paddedsize > VALIST.__skip
3268 && VALIST.__args <= VALIST.__skip)
3269 addr = VALIST.__skip + 32;
3270 else
3271 addr = VALIST.__args;
3272 VALIST.__args = addr + paddedsize;
3273 ret = *(TYPE *)addr;
3274 */
3275static tree
75a70cf9 3276spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3277 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 3278{
3279 tree f_args, f_skip;
3280 tree args, skip;
3281 HOST_WIDE_INT size, rsize;
3282 tree paddedsize, addr, tmp;
3283 bool pass_by_reference_p;
3284
3285 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3286 f_skip = TREE_CHAIN (f_args);
3287
3288 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3289 args =
3290 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3291 skip =
3292 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3293
3294 addr = create_tmp_var (ptr_type_node, "va_arg");
3295 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3296
3297 /* if an object is dynamically sized, a pointer to it is passed
3298 instead of the object itself. */
3299 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3300 false);
3301 if (pass_by_reference_p)
3302 type = build_pointer_type (type);
3303 size = int_size_in_bytes (type);
3304 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3305
3306 /* build conditional expression to calculate addr. The expression
3307 will be gimplified later. */
0de36bdb 3308 paddedsize = size_int (rsize);
75a70cf9 3309 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 3310 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 3311 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3312 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3313 unshare_expr (skip)));
644459d0 3314
3315 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 3316 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3317 size_int (32)), unshare_expr (args));
644459d0 3318
75a70cf9 3319 gimplify_assign (addr, tmp, pre_p);
644459d0 3320
3321 /* update VALIST.__args */
0de36bdb 3322 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 3323 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 3324
3325 addr = fold_convert (build_pointer_type (type), addr);
3326
3327 if (pass_by_reference_p)
3328 addr = build_va_arg_indirect_ref (addr);
3329
3330 return build_va_arg_indirect_ref (addr);
3331}
3332
3333/* Save parameter registers starting with the register that corresponds
3334 to the first unnamed parameters. If the first unnamed parameter is
3335 in the stack then save no registers. Set pretend_args_size to the
3336 amount of space needed to save the registers. */
3337void
3338spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3339 tree type, int *pretend_size, int no_rtl)
3340{
3341 if (!no_rtl)
3342 {
3343 rtx tmp;
3344 int regno;
3345 int offset;
3346 int ncum = *cum;
3347
3348 /* cum currently points to the last named argument, we want to
3349 start at the next argument. */
3350 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3351
3352 offset = -STACK_POINTER_OFFSET;
3353 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3354 {
3355 tmp = gen_frame_mem (V4SImode,
3356 plus_constant (virtual_incoming_args_rtx,
3357 offset));
3358 emit_move_insn (tmp,
3359 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3360 offset += 16;
3361 }
3362 *pretend_size = offset + STACK_POINTER_OFFSET;
3363 }
3364}
3365\f
3366void
3367spu_conditional_register_usage (void)
3368{
3369 if (flag_pic)
3370 {
3371 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3372 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3373 }
644459d0 3374}
3375
3376/* This is called to decide when we can simplify a load instruction. We
3377 must only return true for registers which we know will always be
3378 aligned. Taking into account that CSE might replace this reg with
3379 another one that has not been marked aligned.
3380 So this is really only true for frame, stack and virtual registers,
fa7637bd 3381 which we know are always aligned and should not be adversely effected
3382 by CSE. */
644459d0 3383static int
3384regno_aligned_for_load (int regno)
3385{
3386 return regno == FRAME_POINTER_REGNUM
5df189be 3387 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
aa71ecd4 3388 || regno == ARG_POINTER_REGNUM
644459d0 3389 || regno == STACK_POINTER_REGNUM
5df189be 3390 || (regno >= FIRST_VIRTUAL_REGISTER
3391 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 3392}
3393
3394/* Return TRUE when mem is known to be 16-byte aligned. */
3395int
3396aligned_mem_p (rtx mem)
3397{
3398 if (MEM_ALIGN (mem) >= 128)
3399 return 1;
3400 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3401 return 1;
3402 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3403 {
3404 rtx p0 = XEXP (XEXP (mem, 0), 0);
3405 rtx p1 = XEXP (XEXP (mem, 0), 1);
3406 if (regno_aligned_for_load (REGNO (p0)))
3407 {
3408 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3409 return 1;
3410 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3411 return 1;
3412 }
3413 }
3414 else if (GET_CODE (XEXP (mem, 0)) == REG)
3415 {
3416 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3417 return 1;
3418 }
3419 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3420 return 1;
3421 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3422 {
3423 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3424 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3425 if (GET_CODE (p0) == SYMBOL_REF
3426 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3427 return 1;
3428 }
3429 return 0;
3430}
3431
69ced2d6 3432/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3433 into its SYMBOL_REF_FLAGS. */
3434static void
3435spu_encode_section_info (tree decl, rtx rtl, int first)
3436{
3437 default_encode_section_info (decl, rtl, first);
3438
3439 /* If a variable has a forced alignment to < 16 bytes, mark it with
3440 SYMBOL_FLAG_ALIGN1. */
3441 if (TREE_CODE (decl) == VAR_DECL
3442 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3443 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3444}
3445
644459d0 3446/* Return TRUE if we are certain the mem refers to a complete object
3447 which is both 16-byte aligned and padded to a 16-byte boundary. This
3448 would make it safe to store with a single instruction.
3449 We guarantee the alignment and padding for static objects by aligning
3450 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3451 FIXME: We currently cannot guarantee this for objects on the stack
3452 because assign_parm_setup_stack calls assign_stack_local with the
3453 alignment of the parameter mode and in that case the alignment never
3454 gets adjusted by LOCAL_ALIGNMENT. */
3455static int
3456store_with_one_insn_p (rtx mem)
3457{
3458 rtx addr = XEXP (mem, 0);
3459 if (GET_MODE (mem) == BLKmode)
3460 return 0;
3461 /* Only static objects. */
3462 if (GET_CODE (addr) == SYMBOL_REF)
3463 {
3464 /* We use the associated declaration to make sure the access is
fa7637bd 3465 referring to the whole object.
644459d0 3466 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3467 if it is necessary. Will there be cases where one exists, and
3468 the other does not? Will there be cases where both exist, but
3469 have different types? */
3470 tree decl = MEM_EXPR (mem);
3471 if (decl
3472 && TREE_CODE (decl) == VAR_DECL
3473 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3474 return 1;
3475 decl = SYMBOL_REF_DECL (addr);
3476 if (decl
3477 && TREE_CODE (decl) == VAR_DECL
3478 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3479 return 1;
3480 }
3481 return 0;
3482}
3483
3484int
3485spu_expand_mov (rtx * ops, enum machine_mode mode)
3486{
3487 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3488 abort ();
3489
3490 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3491 {
3492 rtx from = SUBREG_REG (ops[1]);
3493 enum machine_mode imode = GET_MODE (from);
3494
3495 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3496 && GET_MODE_CLASS (imode) == MODE_INT
3497 && subreg_lowpart_p (ops[1]));
3498
3499 if (GET_MODE_SIZE (imode) < 4)
3500 {
3501 from = gen_rtx_SUBREG (SImode, from, 0);
3502 imode = SImode;
3503 }
3504
3505 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3506 {
99bdde56 3507 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 3508 emit_insn (GEN_FCN (icode) (ops[0], from));
3509 }
3510 else
3511 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3512 return 1;
3513 }
3514
3515 /* At least one of the operands needs to be a register. */
3516 if ((reload_in_progress | reload_completed) == 0
3517 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3518 {
3519 rtx temp = force_reg (mode, ops[1]);
3520 emit_move_insn (ops[0], temp);
3521 return 1;
3522 }
3523 if (reload_in_progress || reload_completed)
3524 {
dea01258 3525 if (CONSTANT_P (ops[1]))
3526 return spu_split_immediate (ops);
644459d0 3527 return 0;
3528 }
3529 else
3530 {
3531 if (GET_CODE (ops[0]) == MEM)
3532 {
3533 if (!spu_valid_move (ops))
3534 {
3535 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3536 gen_reg_rtx (TImode)));
3537 return 1;
3538 }
3539 }
3540 else if (GET_CODE (ops[1]) == MEM)
3541 {
3542 if (!spu_valid_move (ops))
3543 {
3544 emit_insn (gen_load
3545 (ops[0], ops[1], gen_reg_rtx (TImode),
3546 gen_reg_rtx (SImode)));
3547 return 1;
3548 }
3549 }
3550 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3551 extend them. */
3552 if (GET_CODE (ops[1]) == CONST_INT)
3553 {
3554 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3555 if (val != INTVAL (ops[1]))
3556 {
3557 emit_move_insn (ops[0], GEN_INT (val));
3558 return 1;
3559 }
3560 }
3561 }
3562 return 0;
3563}
3564
644459d0 3565void
3566spu_split_load (rtx * ops)
3567{
3568 enum machine_mode mode = GET_MODE (ops[0]);
3569 rtx addr, load, rot, mem, p0, p1;
3570 int rot_amt;
3571
3572 addr = XEXP (ops[1], 0);
3573
3574 rot = 0;
3575 rot_amt = 0;
3576 if (GET_CODE (addr) == PLUS)
3577 {
3578 /* 8 cases:
3579 aligned reg + aligned reg => lqx
3580 aligned reg + unaligned reg => lqx, rotqby
3581 aligned reg + aligned const => lqd
3582 aligned reg + unaligned const => lqd, rotqbyi
3583 unaligned reg + aligned reg => lqx, rotqby
3584 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3585 unaligned reg + aligned const => lqd, rotqby
3586 unaligned reg + unaligned const -> not allowed by legitimate address
3587 */
3588 p0 = XEXP (addr, 0);
3589 p1 = XEXP (addr, 1);
aa71ecd4 3590 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
644459d0 3591 {
aa71ecd4 3592 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 3593 {
3594 emit_insn (gen_addsi3 (ops[3], p0, p1));
3595 rot = ops[3];
3596 }
3597 else
3598 rot = p0;
3599 }
3600 else
3601 {
3602 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3603 {
3604 rot_amt = INTVAL (p1) & 15;
3605 p1 = GEN_INT (INTVAL (p1) & -16);
3606 addr = gen_rtx_PLUS (SImode, p0, p1);
3607 }
aa71ecd4 3608 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 3609 rot = p1;
3610 }
3611 }
3612 else if (GET_CODE (addr) == REG)
3613 {
aa71ecd4 3614 if (!regno_aligned_for_load (REGNO (addr)))
644459d0 3615 rot = addr;
3616 }
3617 else if (GET_CODE (addr) == CONST)
3618 {
3619 if (GET_CODE (XEXP (addr, 0)) == PLUS
3620 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3621 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3622 {
3623 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3624 if (rot_amt & -16)
3625 addr = gen_rtx_CONST (Pmode,
3626 gen_rtx_PLUS (Pmode,
3627 XEXP (XEXP (addr, 0), 0),
3628 GEN_INT (rot_amt & -16)));
3629 else
3630 addr = XEXP (XEXP (addr, 0), 0);
3631 }
3632 else
3633 rot = addr;
3634 }
3635 else if (GET_CODE (addr) == CONST_INT)
3636 {
3637 rot_amt = INTVAL (addr);
3638 addr = GEN_INT (rot_amt & -16);
3639 }
3640 else if (!ALIGNED_SYMBOL_REF_P (addr))
3641 rot = addr;
3642
3643 if (GET_MODE_SIZE (mode) < 4)
3644 rot_amt += GET_MODE_SIZE (mode) - 4;
3645
3646 rot_amt &= 15;
3647
3648 if (rot && rot_amt)
3649 {
3650 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3651 rot = ops[3];
3652 rot_amt = 0;
3653 }
3654
3655 load = ops[2];
3656
3657 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3658 mem = change_address (ops[1], TImode, addr);
3659
e04cf423 3660 emit_insn (gen_movti (load, mem));
644459d0 3661
3662 if (rot)
3663 emit_insn (gen_rotqby_ti (load, load, rot));
3664 else if (rot_amt)
3665 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3666
3667 if (reload_completed)
3668 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3669 else
3670 emit_insn (gen_spu_convert (ops[0], load));
3671}
3672
3673void
3674spu_split_store (rtx * ops)
3675{
3676 enum machine_mode mode = GET_MODE (ops[0]);
3677 rtx pat = ops[2];
3678 rtx reg = ops[3];
3679 rtx addr, p0, p1, p1_lo, smem;
3680 int aform;
3681 int scalar;
3682
3683 addr = XEXP (ops[0], 0);
3684
3685 if (GET_CODE (addr) == PLUS)
3686 {
3687 /* 8 cases:
3688 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3689 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3690 aligned reg + aligned const => lqd, c?d, shuf, stqx
3691 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3692 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3693 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3694 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3695 unaligned reg + unaligned const -> not allowed by legitimate address
3696 */
3697 aform = 0;
3698 p0 = XEXP (addr, 0);
3699 p1 = p1_lo = XEXP (addr, 1);
3700 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3701 {
3702 p1_lo = GEN_INT (INTVAL (p1) & 15);
3703 p1 = GEN_INT (INTVAL (p1) & -16);
3704 addr = gen_rtx_PLUS (SImode, p0, p1);
3705 }
3706 }
3707 else if (GET_CODE (addr) == REG)
3708 {
3709 aform = 0;
3710 p0 = addr;
3711 p1 = p1_lo = const0_rtx;
3712 }
3713 else
3714 {
3715 aform = 1;
3716 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3717 p1 = 0; /* aform doesn't use p1 */
3718 p1_lo = addr;
3719 if (ALIGNED_SYMBOL_REF_P (addr))
3720 p1_lo = const0_rtx;
3721 else if (GET_CODE (addr) == CONST)
3722 {
3723 if (GET_CODE (XEXP (addr, 0)) == PLUS
3724 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3725 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3726 {
3727 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3728 if ((v & -16) != 0)
3729 addr = gen_rtx_CONST (Pmode,
3730 gen_rtx_PLUS (Pmode,
3731 XEXP (XEXP (addr, 0), 0),
3732 GEN_INT (v & -16)));
3733 else
3734 addr = XEXP (XEXP (addr, 0), 0);
3735 p1_lo = GEN_INT (v & 15);
3736 }
3737 }
3738 else if (GET_CODE (addr) == CONST_INT)
3739 {
3740 p1_lo = GEN_INT (INTVAL (addr) & 15);
3741 addr = GEN_INT (INTVAL (addr) & -16);
3742 }
3743 }
3744
e04cf423 3745 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3746
644459d0 3747 scalar = store_with_one_insn_p (ops[0]);
3748 if (!scalar)
3749 {
3750 /* We could copy the flags from the ops[0] MEM to mem here,
3751 We don't because we want this load to be optimized away if
3752 possible, and copying the flags will prevent that in certain
3753 cases, e.g. consider the volatile flag. */
3754
e04cf423 3755 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3756 set_mem_alias_set (lmem, 0);
3757 emit_insn (gen_movti (reg, lmem));
644459d0 3758
aa71ecd4 3759 if (!p0 || regno_aligned_for_load (REGNO (p0)))
644459d0 3760 p0 = stack_pointer_rtx;
3761 if (!p1_lo)
3762 p1_lo = const0_rtx;
3763
3764 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3765 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3766 }
3767 else if (reload_completed)
3768 {
3769 if (GET_CODE (ops[1]) == REG)
3770 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3771 else if (GET_CODE (ops[1]) == SUBREG)
3772 emit_move_insn (reg,
3773 gen_rtx_REG (GET_MODE (reg),
3774 REGNO (SUBREG_REG (ops[1]))));
3775 else
3776 abort ();
3777 }
3778 else
3779 {
3780 if (GET_CODE (ops[1]) == REG)
3781 emit_insn (gen_spu_convert (reg, ops[1]));
3782 else if (GET_CODE (ops[1]) == SUBREG)
3783 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3784 else
3785 abort ();
3786 }
3787
3788 if (GET_MODE_SIZE (mode) < 4 && scalar)
3789 emit_insn (gen_shlqby_ti
3790 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3791
644459d0 3792 smem = change_address (ops[0], TImode, addr);
3793 /* We can't use the previous alias set because the memory has changed
3794 size and can potentially overlap objects of other types. */
3795 set_mem_alias_set (smem, 0);
3796
e04cf423 3797 emit_insn (gen_movti (smem, reg));
644459d0 3798}
3799
3800/* Return TRUE if X is MEM which is a struct member reference
3801 and the member can safely be loaded and stored with a single
3802 instruction because it is padded. */
3803static int
3804mem_is_padded_component_ref (rtx x)
3805{
3806 tree t = MEM_EXPR (x);
3807 tree r;
3808 if (!t || TREE_CODE (t) != COMPONENT_REF)
3809 return 0;
3810 t = TREE_OPERAND (t, 1);
3811 if (!t || TREE_CODE (t) != FIELD_DECL
3812 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3813 return 0;
3814 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3815 r = DECL_FIELD_CONTEXT (t);
3816 if (!r || TREE_CODE (r) != RECORD_TYPE)
3817 return 0;
3818 /* Make sure they are the same mode */
3819 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3820 return 0;
3821 /* If there are no following fields then the field alignment assures
fa7637bd 3822 the structure is padded to the alignment which means this field is
3823 padded too. */
644459d0 3824 if (TREE_CHAIN (t) == 0)
3825 return 1;
3826 /* If the following field is also aligned then this field will be
3827 padded. */
3828 t = TREE_CHAIN (t);
3829 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3830 return 1;
3831 return 0;
3832}
3833
c7b91b14 3834/* Parse the -mfixed-range= option string. */
3835static void
3836fix_range (const char *const_str)
3837{
3838 int i, first, last;
3839 char *str, *dash, *comma;
3840
3841 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3842 REG2 are either register names or register numbers. The effect
3843 of this option is to mark the registers in the range from REG1 to
3844 REG2 as ``fixed'' so they won't be used by the compiler. */
3845
3846 i = strlen (const_str);
3847 str = (char *) alloca (i + 1);
3848 memcpy (str, const_str, i + 1);
3849
3850 while (1)
3851 {
3852 dash = strchr (str, '-');
3853 if (!dash)
3854 {
3855 warning (0, "value of -mfixed-range must have form REG1-REG2");
3856 return;
3857 }
3858 *dash = '\0';
3859 comma = strchr (dash + 1, ',');
3860 if (comma)
3861 *comma = '\0';
3862
3863 first = decode_reg_name (str);
3864 if (first < 0)
3865 {
3866 warning (0, "unknown register name: %s", str);
3867 return;
3868 }
3869
3870 last = decode_reg_name (dash + 1);
3871 if (last < 0)
3872 {
3873 warning (0, "unknown register name: %s", dash + 1);
3874 return;
3875 }
3876
3877 *dash = '-';
3878
3879 if (first > last)
3880 {
3881 warning (0, "%s-%s is an empty range", str, dash + 1);
3882 return;
3883 }
3884
3885 for (i = first; i <= last; ++i)
3886 fixed_regs[i] = call_used_regs[i] = 1;
3887
3888 if (!comma)
3889 break;
3890
3891 *comma = ',';
3892 str = comma + 1;
3893 }
3894}
3895
644459d0 3896int
3897spu_valid_move (rtx * ops)
3898{
3899 enum machine_mode mode = GET_MODE (ops[0]);
3900 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3901 return 0;
3902
3903 /* init_expr_once tries to recog against load and store insns to set
3904 the direct_load[] and direct_store[] arrays. We always want to
3905 consider those loads and stores valid. init_expr_once is called in
3906 the context of a dummy function which does not have a decl. */
3907 if (cfun->decl == 0)
3908 return 1;
3909
3910 /* Don't allows loads/stores which would require more than 1 insn.
3911 During and after reload we assume loads and stores only take 1
3912 insn. */
3913 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3914 {
3915 if (GET_CODE (ops[0]) == MEM
3916 && (GET_MODE_SIZE (mode) < 4
3917 || !(store_with_one_insn_p (ops[0])
3918 || mem_is_padded_component_ref (ops[0]))))
3919 return 0;
3920 if (GET_CODE (ops[1]) == MEM
3921 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3922 return 0;
3923 }
3924 return 1;
3925}
3926
3927/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3928 can be generated using the fsmbi instruction. */
3929int
3930fsmbi_const_p (rtx x)
3931{
dea01258 3932 if (CONSTANT_P (x))
3933 {
5df189be 3934 /* We can always choose TImode for CONST_INT because the high bits
dea01258 3935 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 3936 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 3937 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 3938 }
3939 return 0;
3940}
3941
3942/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3943 can be generated using the cbd, chd, cwd or cdd instruction. */
3944int
3945cpat_const_p (rtx x, enum machine_mode mode)
3946{
3947 if (CONSTANT_P (x))
3948 {
3949 enum immediate_class c = classify_immediate (x, mode);
3950 return c == IC_CPAT;
3951 }
3952 return 0;
3953}
644459d0 3954
dea01258 3955rtx
3956gen_cpat_const (rtx * ops)
3957{
3958 unsigned char dst[16];
3959 int i, offset, shift, isize;
3960 if (GET_CODE (ops[3]) != CONST_INT
3961 || GET_CODE (ops[2]) != CONST_INT
3962 || (GET_CODE (ops[1]) != CONST_INT
3963 && GET_CODE (ops[1]) != REG))
3964 return 0;
3965 if (GET_CODE (ops[1]) == REG
3966 && (!REG_POINTER (ops[1])
3967 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3968 return 0;
644459d0 3969
3970 for (i = 0; i < 16; i++)
dea01258 3971 dst[i] = i + 16;
3972 isize = INTVAL (ops[3]);
3973 if (isize == 1)
3974 shift = 3;
3975 else if (isize == 2)
3976 shift = 2;
3977 else
3978 shift = 0;
3979 offset = (INTVAL (ops[2]) +
3980 (GET_CODE (ops[1]) ==
3981 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3982 for (i = 0; i < isize; i++)
3983 dst[offset + i] = i + shift;
3984 return array_to_constant (TImode, dst);
644459d0 3985}
3986
3987/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3988 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3989 than 16 bytes, the value is repeated across the rest of the array. */
3990void
3991constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3992{
3993 HOST_WIDE_INT val;
3994 int i, j, first;
3995
3996 memset (arr, 0, 16);
3997 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3998 if (GET_CODE (x) == CONST_INT
3999 || (GET_CODE (x) == CONST_DOUBLE
4000 && (mode == SFmode || mode == DFmode)))
4001 {
4002 gcc_assert (mode != VOIDmode && mode != BLKmode);
4003
4004 if (GET_CODE (x) == CONST_DOUBLE)
4005 val = const_double_to_hwint (x);
4006 else
4007 val = INTVAL (x);
4008 first = GET_MODE_SIZE (mode) - 1;
4009 for (i = first; i >= 0; i--)
4010 {
4011 arr[i] = val & 0xff;
4012 val >>= 8;
4013 }
4014 /* Splat the constant across the whole array. */
4015 for (j = 0, i = first + 1; i < 16; i++)
4016 {
4017 arr[i] = arr[j];
4018 j = (j == first) ? 0 : j + 1;
4019 }
4020 }
4021 else if (GET_CODE (x) == CONST_DOUBLE)
4022 {
4023 val = CONST_DOUBLE_LOW (x);
4024 for (i = 15; i >= 8; i--)
4025 {
4026 arr[i] = val & 0xff;
4027 val >>= 8;
4028 }
4029 val = CONST_DOUBLE_HIGH (x);
4030 for (i = 7; i >= 0; i--)
4031 {
4032 arr[i] = val & 0xff;
4033 val >>= 8;
4034 }
4035 }
4036 else if (GET_CODE (x) == CONST_VECTOR)
4037 {
4038 int units;
4039 rtx elt;
4040 mode = GET_MODE_INNER (mode);
4041 units = CONST_VECTOR_NUNITS (x);
4042 for (i = 0; i < units; i++)
4043 {
4044 elt = CONST_VECTOR_ELT (x, i);
4045 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4046 {
4047 if (GET_CODE (elt) == CONST_DOUBLE)
4048 val = const_double_to_hwint (elt);
4049 else
4050 val = INTVAL (elt);
4051 first = GET_MODE_SIZE (mode) - 1;
4052 if (first + i * GET_MODE_SIZE (mode) > 16)
4053 abort ();
4054 for (j = first; j >= 0; j--)
4055 {
4056 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4057 val >>= 8;
4058 }
4059 }
4060 }
4061 }
4062 else
4063 gcc_unreachable();
4064}
4065
4066/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4067 smaller than 16 bytes, use the bytes that would represent that value
4068 in a register, e.g., for QImode return the value of arr[3]. */
4069rtx
4070array_to_constant (enum machine_mode mode, unsigned char arr[16])
4071{
4072 enum machine_mode inner_mode;
4073 rtvec v;
4074 int units, size, i, j, k;
4075 HOST_WIDE_INT val;
4076
4077 if (GET_MODE_CLASS (mode) == MODE_INT
4078 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4079 {
4080 j = GET_MODE_SIZE (mode);
4081 i = j < 4 ? 4 - j : 0;
4082 for (val = 0; i < j; i++)
4083 val = (val << 8) | arr[i];
4084 val = trunc_int_for_mode (val, mode);
4085 return GEN_INT (val);
4086 }
4087
4088 if (mode == TImode)
4089 {
4090 HOST_WIDE_INT high;
4091 for (i = high = 0; i < 8; i++)
4092 high = (high << 8) | arr[i];
4093 for (i = 8, val = 0; i < 16; i++)
4094 val = (val << 8) | arr[i];
4095 return immed_double_const (val, high, TImode);
4096 }
4097 if (mode == SFmode)
4098 {
4099 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4100 val = trunc_int_for_mode (val, SImode);
171b6d22 4101 return hwint_to_const_double (SFmode, val);
644459d0 4102 }
4103 if (mode == DFmode)
4104 {
4105 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4106 val <<= 32;
4107 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
171b6d22 4108 return hwint_to_const_double (DFmode, val);
644459d0 4109 }
4110
4111 if (!VECTOR_MODE_P (mode))
4112 abort ();
4113
4114 units = GET_MODE_NUNITS (mode);
4115 size = GET_MODE_UNIT_SIZE (mode);
4116 inner_mode = GET_MODE_INNER (mode);
4117 v = rtvec_alloc (units);
4118
4119 for (k = i = 0; i < units; ++i)
4120 {
4121 val = 0;
4122 for (j = 0; j < size; j++, k++)
4123 val = (val << 8) | arr[k];
4124
4125 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4126 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4127 else
4128 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4129 }
4130 if (k > 16)
4131 abort ();
4132
4133 return gen_rtx_CONST_VECTOR (mode, v);
4134}
4135
4136static void
4137reloc_diagnostic (rtx x)
4138{
4139 tree loc_decl, decl = 0;
4140 const char *msg;
4141 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4142 return;
4143
4144 if (GET_CODE (x) == SYMBOL_REF)
4145 decl = SYMBOL_REF_DECL (x);
4146 else if (GET_CODE (x) == CONST
4147 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4148 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4149
4150 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4151 if (decl && !DECL_P (decl))
4152 decl = 0;
4153
4154 /* We use last_assemble_variable_decl to get line information. It's
4155 not always going to be right and might not even be close, but will
4156 be right for the more common cases. */
5df189be 4157 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4158 loc_decl = decl;
4159 else
4160 loc_decl = last_assemble_variable_decl;
4161
4162 /* The decl could be a string constant. */
4163 if (decl && DECL_P (decl))
4164 msg = "%Jcreating run-time relocation for %qD";
4165 else
4166 msg = "creating run-time relocation";
4167
99369027 4168 if (TARGET_WARN_RELOC)
644459d0 4169 warning (0, msg, loc_decl, decl);
99369027 4170 else
4171 error (msg, loc_decl, decl);
644459d0 4172}
4173
4174/* Hook into assemble_integer so we can generate an error for run-time
4175 relocations. The SPU ABI disallows them. */
4176static bool
4177spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4178{
4179 /* By default run-time relocations aren't supported, but we allow them
4180 in case users support it in their own run-time loader. And we provide
4181 a warning for those users that don't. */
4182 if ((GET_CODE (x) == SYMBOL_REF)
4183 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4184 reloc_diagnostic (x);
4185
4186 return default_assemble_integer (x, size, aligned_p);
4187}
4188
4189static void
4190spu_asm_globalize_label (FILE * file, const char *name)
4191{
4192 fputs ("\t.global\t", file);
4193 assemble_name (file, name);
4194 fputs ("\n", file);
4195}
4196
4197static bool
4198spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
4199{
4200 enum machine_mode mode = GET_MODE (x);
4201 int cost = COSTS_N_INSNS (2);
4202
4203 /* Folding to a CONST_VECTOR will use extra space but there might
4204 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4205 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4206 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4207 because this cost will only be compared against a single insn.
4208 if (code == CONST_VECTOR)
4209 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4210 */
4211
4212 /* Use defaults for float operations. Not accurate but good enough. */
4213 if (mode == DFmode)
4214 {
4215 *total = COSTS_N_INSNS (13);
4216 return true;
4217 }
4218 if (mode == SFmode)
4219 {
4220 *total = COSTS_N_INSNS (6);
4221 return true;
4222 }
4223 switch (code)
4224 {
4225 case CONST_INT:
4226 if (satisfies_constraint_K (x))
4227 *total = 0;
4228 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4229 *total = COSTS_N_INSNS (1);
4230 else
4231 *total = COSTS_N_INSNS (3);
4232 return true;
4233
4234 case CONST:
4235 *total = COSTS_N_INSNS (3);
4236 return true;
4237
4238 case LABEL_REF:
4239 case SYMBOL_REF:
4240 *total = COSTS_N_INSNS (0);
4241 return true;
4242
4243 case CONST_DOUBLE:
4244 *total = COSTS_N_INSNS (5);
4245 return true;
4246
4247 case FLOAT_EXTEND:
4248 case FLOAT_TRUNCATE:
4249 case FLOAT:
4250 case UNSIGNED_FLOAT:
4251 case FIX:
4252 case UNSIGNED_FIX:
4253 *total = COSTS_N_INSNS (7);
4254 return true;
4255
4256 case PLUS:
4257 if (mode == TImode)
4258 {
4259 *total = COSTS_N_INSNS (9);
4260 return true;
4261 }
4262 break;
4263
4264 case MULT:
4265 cost =
4266 GET_CODE (XEXP (x, 0)) ==
4267 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4268 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4269 {
4270 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4271 {
4272 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4273 cost = COSTS_N_INSNS (14);
4274 if ((val & 0xffff) == 0)
4275 cost = COSTS_N_INSNS (9);
4276 else if (val > 0 && val < 0x10000)
4277 cost = COSTS_N_INSNS (11);
4278 }
4279 }
4280 *total = cost;
4281 return true;
4282 case DIV:
4283 case UDIV:
4284 case MOD:
4285 case UMOD:
4286 *total = COSTS_N_INSNS (20);
4287 return true;
4288 case ROTATE:
4289 case ROTATERT:
4290 case ASHIFT:
4291 case ASHIFTRT:
4292 case LSHIFTRT:
4293 *total = COSTS_N_INSNS (4);
4294 return true;
4295 case UNSPEC:
4296 if (XINT (x, 1) == UNSPEC_CONVERT)
4297 *total = COSTS_N_INSNS (0);
4298 else
4299 *total = COSTS_N_INSNS (4);
4300 return true;
4301 }
4302 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4303 if (GET_MODE_CLASS (mode) == MODE_INT
4304 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4305 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4306 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4307 *total = cost;
4308 return true;
4309}
4310
1bd43494 4311static enum machine_mode
4312spu_unwind_word_mode (void)
644459d0 4313{
1bd43494 4314 return SImode;
644459d0 4315}
4316
4317/* Decide whether we can make a sibling call to a function. DECL is the
4318 declaration of the function being targeted by the call and EXP is the
4319 CALL_EXPR representing the call. */
4320static bool
4321spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4322{
4323 return decl && !TARGET_LARGE_MEM;
4324}
4325
4326/* We need to correctly update the back chain pointer and the Available
4327 Stack Size (which is in the second slot of the sp register.) */
4328void
4329spu_allocate_stack (rtx op0, rtx op1)
4330{
4331 HOST_WIDE_INT v;
4332 rtx chain = gen_reg_rtx (V4SImode);
4333 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4334 rtx sp = gen_reg_rtx (V4SImode);
4335 rtx splatted = gen_reg_rtx (V4SImode);
4336 rtx pat = gen_reg_rtx (TImode);
4337
4338 /* copy the back chain so we can save it back again. */
4339 emit_move_insn (chain, stack_bot);
4340
4341 op1 = force_reg (SImode, op1);
4342
4343 v = 0x1020300010203ll;
4344 emit_move_insn (pat, immed_double_const (v, v, TImode));
4345 emit_insn (gen_shufb (splatted, op1, op1, pat));
4346
4347 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4348 emit_insn (gen_subv4si3 (sp, sp, splatted));
4349
4350 if (flag_stack_check)
4351 {
4352 rtx avail = gen_reg_rtx(SImode);
4353 rtx result = gen_reg_rtx(SImode);
4354 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4355 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4356 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4357 }
4358
4359 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4360
4361 emit_move_insn (stack_bot, chain);
4362
4363 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4364}
4365
4366void
4367spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4368{
4369 static unsigned char arr[16] =
4370 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4371 rtx temp = gen_reg_rtx (SImode);
4372 rtx temp2 = gen_reg_rtx (SImode);
4373 rtx temp3 = gen_reg_rtx (V4SImode);
4374 rtx temp4 = gen_reg_rtx (V4SImode);
4375 rtx pat = gen_reg_rtx (TImode);
4376 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4377
4378 /* Restore the backchain from the first word, sp from the second. */
4379 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4380 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4381
4382 emit_move_insn (pat, array_to_constant (TImode, arr));
4383
4384 /* Compute Available Stack Size for sp */
4385 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4386 emit_insn (gen_shufb (temp3, temp, temp, pat));
4387
4388 /* Compute Available Stack Size for back chain */
4389 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4390 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4391 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4392
4393 emit_insn (gen_addv4si3 (sp, sp, temp3));
4394 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4395}
4396
4397static void
4398spu_init_libfuncs (void)
4399{
4400 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4401 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4402 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4403 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4404 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4405 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4406 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4407 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4408 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4409 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4410 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4411
4412 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4413 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 4414
4415 set_optab_libfunc (smul_optab, TImode, "__multi3");
4416 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
4417 set_optab_libfunc (smod_optab, TImode, "__modti3");
4418 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
4419 set_optab_libfunc (umod_optab, TImode, "__umodti3");
4420 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 4421}
4422
4423/* Make a subreg, stripping any existing subreg. We could possibly just
4424 call simplify_subreg, but in this case we know what we want. */
4425rtx
4426spu_gen_subreg (enum machine_mode mode, rtx x)
4427{
4428 if (GET_CODE (x) == SUBREG)
4429 x = SUBREG_REG (x);
4430 if (GET_MODE (x) == mode)
4431 return x;
4432 return gen_rtx_SUBREG (mode, x, 0);
4433}
4434
4435static bool
fb80456a 4436spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 4437{
4438 return (TYPE_MODE (type) == BLKmode
4439 && ((type) == 0
4440 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4441 || int_size_in_bytes (type) >
4442 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4443}
4444\f
4445/* Create the built-in types and functions */
4446
4447struct spu_builtin_description spu_builtins[] = {
4448#define DEF_BUILTIN(fcode, icode, name, type, params) \
4449 {fcode, icode, name, type, params, NULL_TREE},
4450#include "spu-builtins.def"
4451#undef DEF_BUILTIN
4452};
4453
4454static void
4455spu_init_builtins (void)
4456{
4457 struct spu_builtin_description *d;
4458 unsigned int i;
4459
4460 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4461 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4462 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4463 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4464 V4SF_type_node = build_vector_type (float_type_node, 4);
4465 V2DF_type_node = build_vector_type (double_type_node, 2);
4466
4467 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4468 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4469 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4470 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4471
c4ecce0c 4472 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 4473
4474 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4475 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4476 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4477 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4478 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4479 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4480 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4481 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4482 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4483 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4484 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4485 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4486
4487 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4488 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4489 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4490 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4491 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4492 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4493 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4494 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4495
4496 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4497 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4498
4499 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4500
4501 spu_builtin_types[SPU_BTI_PTR] =
4502 build_pointer_type (build_qualified_type
4503 (void_type_node,
4504 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4505
4506 /* For each builtin we build a new prototype. The tree code will make
4507 sure nodes are shared. */
4508 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4509 {
4510 tree p;
4511 char name[64]; /* build_function will make a copy. */
4512 int parm;
4513
4514 if (d->name == 0)
4515 continue;
4516
5dfbd18f 4517 /* Find last parm. */
644459d0 4518 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 4519 ;
644459d0 4520
4521 p = void_list_node;
4522 while (parm > 1)
4523 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4524
4525 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4526
4527 sprintf (name, "__builtin_%s", d->name);
4528 d->fndecl =
4529 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4530 NULL, NULL_TREE);
a76866d3 4531 if (d->fcode == SPU_MASK_FOR_LOAD)
4532 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 4533
4534 /* These builtins don't throw. */
4535 TREE_NOTHROW (d->fndecl) = 1;
644459d0 4536 }
4537}
4538
cf31d486 4539void
4540spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4541{
4542 static unsigned char arr[16] =
4543 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4544
4545 rtx temp = gen_reg_rtx (Pmode);
4546 rtx temp2 = gen_reg_rtx (V4SImode);
4547 rtx temp3 = gen_reg_rtx (V4SImode);
4548 rtx pat = gen_reg_rtx (TImode);
4549 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4550
4551 emit_move_insn (pat, array_to_constant (TImode, arr));
4552
4553 /* Restore the sp. */
4554 emit_move_insn (temp, op1);
4555 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4556
4557 /* Compute available stack size for sp. */
4558 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4559 emit_insn (gen_shufb (temp3, temp, temp, pat));
4560
4561 emit_insn (gen_addv4si3 (sp, sp, temp3));
4562 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4563}
4564
644459d0 4565int
4566spu_safe_dma (HOST_WIDE_INT channel)
4567{
006e4b96 4568 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 4569}
4570
4571void
4572spu_builtin_splats (rtx ops[])
4573{
4574 enum machine_mode mode = GET_MODE (ops[0]);
4575 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4576 {
4577 unsigned char arr[16];
4578 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4579 emit_move_insn (ops[0], array_to_constant (mode, arr));
4580 }
644459d0 4581 else
4582 {
4583 rtx reg = gen_reg_rtx (TImode);
4584 rtx shuf;
4585 if (GET_CODE (ops[1]) != REG
4586 && GET_CODE (ops[1]) != SUBREG)
4587 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4588 switch (mode)
4589 {
4590 case V2DImode:
4591 case V2DFmode:
4592 shuf =
4593 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4594 TImode);
4595 break;
4596 case V4SImode:
4597 case V4SFmode:
4598 shuf =
4599 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4600 TImode);
4601 break;
4602 case V8HImode:
4603 shuf =
4604 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4605 TImode);
4606 break;
4607 case V16QImode:
4608 shuf =
4609 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4610 TImode);
4611 break;
4612 default:
4613 abort ();
4614 }
4615 emit_move_insn (reg, shuf);
4616 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4617 }
4618}
4619
4620void
4621spu_builtin_extract (rtx ops[])
4622{
4623 enum machine_mode mode;
4624 rtx rot, from, tmp;
4625
4626 mode = GET_MODE (ops[1]);
4627
4628 if (GET_CODE (ops[2]) == CONST_INT)
4629 {
4630 switch (mode)
4631 {
4632 case V16QImode:
4633 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4634 break;
4635 case V8HImode:
4636 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4637 break;
4638 case V4SFmode:
4639 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4640 break;
4641 case V4SImode:
4642 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4643 break;
4644 case V2DImode:
4645 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4646 break;
4647 case V2DFmode:
4648 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4649 break;
4650 default:
4651 abort ();
4652 }
4653 return;
4654 }
4655
4656 from = spu_gen_subreg (TImode, ops[1]);
4657 rot = gen_reg_rtx (TImode);
4658 tmp = gen_reg_rtx (SImode);
4659
4660 switch (mode)
4661 {
4662 case V16QImode:
4663 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4664 break;
4665 case V8HImode:
4666 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4667 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4668 break;
4669 case V4SFmode:
4670 case V4SImode:
4671 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4672 break;
4673 case V2DImode:
4674 case V2DFmode:
4675 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4676 break;
4677 default:
4678 abort ();
4679 }
4680 emit_insn (gen_rotqby_ti (rot, from, tmp));
4681
4682 emit_insn (gen_spu_convert (ops[0], rot));
4683}
4684
4685void
4686spu_builtin_insert (rtx ops[])
4687{
4688 enum machine_mode mode = GET_MODE (ops[0]);
4689 enum machine_mode imode = GET_MODE_INNER (mode);
4690 rtx mask = gen_reg_rtx (TImode);
4691 rtx offset;
4692
4693 if (GET_CODE (ops[3]) == CONST_INT)
4694 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4695 else
4696 {
4697 offset = gen_reg_rtx (SImode);
4698 emit_insn (gen_mulsi3
4699 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4700 }
4701 emit_insn (gen_cpat
4702 (mask, stack_pointer_rtx, offset,
4703 GEN_INT (GET_MODE_SIZE (imode))));
4704 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4705}
4706
4707void
4708spu_builtin_promote (rtx ops[])
4709{
4710 enum machine_mode mode, imode;
4711 rtx rot, from, offset;
4712 HOST_WIDE_INT pos;
4713
4714 mode = GET_MODE (ops[0]);
4715 imode = GET_MODE_INNER (mode);
4716
4717 from = gen_reg_rtx (TImode);
4718 rot = spu_gen_subreg (TImode, ops[0]);
4719
4720 emit_insn (gen_spu_convert (from, ops[1]));
4721
4722 if (GET_CODE (ops[2]) == CONST_INT)
4723 {
4724 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4725 if (GET_MODE_SIZE (imode) < 4)
4726 pos += 4 - GET_MODE_SIZE (imode);
4727 offset = GEN_INT (pos & 15);
4728 }
4729 else
4730 {
4731 offset = gen_reg_rtx (SImode);
4732 switch (mode)
4733 {
4734 case V16QImode:
4735 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4736 break;
4737 case V8HImode:
4738 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4739 emit_insn (gen_addsi3 (offset, offset, offset));
4740 break;
4741 case V4SFmode:
4742 case V4SImode:
4743 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4744 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4745 break;
4746 case V2DImode:
4747 case V2DFmode:
4748 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4749 break;
4750 default:
4751 abort ();
4752 }
4753 }
4754 emit_insn (gen_rotqby_ti (rot, from, offset));
4755}
4756
4757void
4758spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4759{
4760 rtx shuf = gen_reg_rtx (V4SImode);
4761 rtx insn = gen_reg_rtx (V4SImode);
4762 rtx shufc;
4763 rtx insnc;
4764 rtx mem;
4765
4766 fnaddr = force_reg (SImode, fnaddr);
4767 cxt = force_reg (SImode, cxt);
4768
4769 if (TARGET_LARGE_MEM)
4770 {
4771 rtx rotl = gen_reg_rtx (V4SImode);
4772 rtx mask = gen_reg_rtx (V4SImode);
4773 rtx bi = gen_reg_rtx (SImode);
4774 unsigned char shufa[16] = {
4775 2, 3, 0, 1, 18, 19, 16, 17,
4776 0, 1, 2, 3, 16, 17, 18, 19
4777 };
4778 unsigned char insna[16] = {
4779 0x41, 0, 0, 79,
4780 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4781 0x60, 0x80, 0, 79,
4782 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4783 };
4784
4785 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4786 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4787
4788 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 4789 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 4790 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4791 emit_insn (gen_selb (insn, insnc, rotl, mask));
4792
4793 mem = memory_address (Pmode, tramp);
4794 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4795
4796 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4797 mem = memory_address (Pmode, plus_constant (tramp, 16));
4798 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4799 }
4800 else
4801 {
4802 rtx scxt = gen_reg_rtx (SImode);
4803 rtx sfnaddr = gen_reg_rtx (SImode);
4804 unsigned char insna[16] = {
4805 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4806 0x30, 0, 0, 0,
4807 0, 0, 0, 0,
4808 0, 0, 0, 0
4809 };
4810
4811 shufc = gen_reg_rtx (TImode);
4812 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4813
4814 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4815 fits 18 bits and the last 4 are zeros. This will be true if
4816 the stack pointer is initialized to 0x3fff0 at program start,
4817 otherwise the ila instruction will be garbage. */
4818
4819 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4820 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4821 emit_insn (gen_cpat
4822 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4823 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4824 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4825
4826 mem = memory_address (Pmode, tramp);
4827 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4828
4829 }
4830 emit_insn (gen_sync ());
4831}
4832
4833void
4834spu_expand_sign_extend (rtx ops[])
4835{
4836 unsigned char arr[16];
4837 rtx pat = gen_reg_rtx (TImode);
4838 rtx sign, c;
4839 int i, last;
4840 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4841 if (GET_MODE (ops[1]) == QImode)
4842 {
4843 sign = gen_reg_rtx (HImode);
4844 emit_insn (gen_extendqihi2 (sign, ops[1]));
4845 for (i = 0; i < 16; i++)
4846 arr[i] = 0x12;
4847 arr[last] = 0x13;
4848 }
4849 else
4850 {
4851 for (i = 0; i < 16; i++)
4852 arr[i] = 0x10;
4853 switch (GET_MODE (ops[1]))
4854 {
4855 case HImode:
4856 sign = gen_reg_rtx (SImode);
4857 emit_insn (gen_extendhisi2 (sign, ops[1]));
4858 arr[last] = 0x03;
4859 arr[last - 1] = 0x02;
4860 break;
4861 case SImode:
4862 sign = gen_reg_rtx (SImode);
4863 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4864 for (i = 0; i < 4; i++)
4865 arr[last - i] = 3 - i;
4866 break;
4867 case DImode:
4868 sign = gen_reg_rtx (SImode);
4869 c = gen_reg_rtx (SImode);
4870 emit_insn (gen_spu_convert (c, ops[1]));
4871 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4872 for (i = 0; i < 8; i++)
4873 arr[last - i] = 7 - i;
4874 break;
4875 default:
4876 abort ();
4877 }
4878 }
4879 emit_move_insn (pat, array_to_constant (TImode, arr));
4880 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4881}
4882
4883/* expand vector initialization. If there are any constant parts,
4884 load constant parts first. Then load any non-constant parts. */
4885void
4886spu_expand_vector_init (rtx target, rtx vals)
4887{
4888 enum machine_mode mode = GET_MODE (target);
4889 int n_elts = GET_MODE_NUNITS (mode);
4890 int n_var = 0;
4891 bool all_same = true;
790c536c 4892 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 4893 int i;
4894
4895 first = XVECEXP (vals, 0, 0);
4896 for (i = 0; i < n_elts; ++i)
4897 {
4898 x = XVECEXP (vals, 0, i);
e442af0b 4899 if (!(CONST_INT_P (x)
4900 || GET_CODE (x) == CONST_DOUBLE
4901 || GET_CODE (x) == CONST_FIXED))
644459d0 4902 ++n_var;
4903 else
4904 {
4905 if (first_constant == NULL_RTX)
4906 first_constant = x;
4907 }
4908 if (i > 0 && !rtx_equal_p (x, first))
4909 all_same = false;
4910 }
4911
4912 /* if all elements are the same, use splats to repeat elements */
4913 if (all_same)
4914 {
4915 if (!CONSTANT_P (first)
4916 && !register_operand (first, GET_MODE (x)))
4917 first = force_reg (GET_MODE (first), first);
4918 emit_insn (gen_spu_splats (target, first));
4919 return;
4920 }
4921
4922 /* load constant parts */
4923 if (n_var != n_elts)
4924 {
4925 if (n_var == 0)
4926 {
4927 emit_move_insn (target,
4928 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4929 }
4930 else
4931 {
4932 rtx constant_parts_rtx = copy_rtx (vals);
4933
4934 gcc_assert (first_constant != NULL_RTX);
4935 /* fill empty slots with the first constant, this increases
4936 our chance of using splats in the recursive call below. */
4937 for (i = 0; i < n_elts; ++i)
e442af0b 4938 {
4939 x = XVECEXP (constant_parts_rtx, 0, i);
4940 if (!(CONST_INT_P (x)
4941 || GET_CODE (x) == CONST_DOUBLE
4942 || GET_CODE (x) == CONST_FIXED))
4943 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4944 }
644459d0 4945
4946 spu_expand_vector_init (target, constant_parts_rtx);
4947 }
4948 }
4949
4950 /* load variable parts */
4951 if (n_var != 0)
4952 {
4953 rtx insert_operands[4];
4954
4955 insert_operands[0] = target;
4956 insert_operands[2] = target;
4957 for (i = 0; i < n_elts; ++i)
4958 {
4959 x = XVECEXP (vals, 0, i);
e442af0b 4960 if (!(CONST_INT_P (x)
4961 || GET_CODE (x) == CONST_DOUBLE
4962 || GET_CODE (x) == CONST_FIXED))
644459d0 4963 {
4964 if (!register_operand (x, GET_MODE (x)))
4965 x = force_reg (GET_MODE (x), x);
4966 insert_operands[1] = x;
4967 insert_operands[3] = GEN_INT (i);
4968 spu_builtin_insert (insert_operands);
4969 }
4970 }
4971 }
4972}
6352eedf 4973
5474166e 4974/* Return insn index for the vector compare instruction for given CODE,
4975 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4976
4977static int
4978get_vec_cmp_insn (enum rtx_code code,
4979 enum machine_mode dest_mode,
4980 enum machine_mode op_mode)
4981
4982{
4983 switch (code)
4984 {
4985 case EQ:
4986 if (dest_mode == V16QImode && op_mode == V16QImode)
4987 return CODE_FOR_ceq_v16qi;
4988 if (dest_mode == V8HImode && op_mode == V8HImode)
4989 return CODE_FOR_ceq_v8hi;
4990 if (dest_mode == V4SImode && op_mode == V4SImode)
4991 return CODE_FOR_ceq_v4si;
4992 if (dest_mode == V4SImode && op_mode == V4SFmode)
4993 return CODE_FOR_ceq_v4sf;
4994 if (dest_mode == V2DImode && op_mode == V2DFmode)
4995 return CODE_FOR_ceq_v2df;
4996 break;
4997 case GT:
4998 if (dest_mode == V16QImode && op_mode == V16QImode)
4999 return CODE_FOR_cgt_v16qi;
5000 if (dest_mode == V8HImode && op_mode == V8HImode)
5001 return CODE_FOR_cgt_v8hi;
5002 if (dest_mode == V4SImode && op_mode == V4SImode)
5003 return CODE_FOR_cgt_v4si;
5004 if (dest_mode == V4SImode && op_mode == V4SFmode)
5005 return CODE_FOR_cgt_v4sf;
5006 if (dest_mode == V2DImode && op_mode == V2DFmode)
5007 return CODE_FOR_cgt_v2df;
5008 break;
5009 case GTU:
5010 if (dest_mode == V16QImode && op_mode == V16QImode)
5011 return CODE_FOR_clgt_v16qi;
5012 if (dest_mode == V8HImode && op_mode == V8HImode)
5013 return CODE_FOR_clgt_v8hi;
5014 if (dest_mode == V4SImode && op_mode == V4SImode)
5015 return CODE_FOR_clgt_v4si;
5016 break;
5017 default:
5018 break;
5019 }
5020 return -1;
5021}
5022
5023/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5024 DMODE is expected destination mode. This is a recursive function. */
5025
5026static rtx
5027spu_emit_vector_compare (enum rtx_code rcode,
5028 rtx op0, rtx op1,
5029 enum machine_mode dmode)
5030{
5031 int vec_cmp_insn;
5032 rtx mask;
5033 enum machine_mode dest_mode;
5034 enum machine_mode op_mode = GET_MODE (op1);
5035
5036 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5037
5038 /* Floating point vector compare instructions uses destination V4SImode.
5039 Double floating point vector compare instructions uses destination V2DImode.
5040 Move destination to appropriate mode later. */
5041 if (dmode == V4SFmode)
5042 dest_mode = V4SImode;
5043 else if (dmode == V2DFmode)
5044 dest_mode = V2DImode;
5045 else
5046 dest_mode = dmode;
5047
5048 mask = gen_reg_rtx (dest_mode);
5049 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5050
5051 if (vec_cmp_insn == -1)
5052 {
5053 bool swap_operands = false;
5054 bool try_again = false;
5055 switch (rcode)
5056 {
5057 case LT:
5058 rcode = GT;
5059 swap_operands = true;
5060 try_again = true;
5061 break;
5062 case LTU:
5063 rcode = GTU;
5064 swap_operands = true;
5065 try_again = true;
5066 break;
5067 case NE:
5068 /* Treat A != B as ~(A==B). */
5069 {
5070 enum insn_code nor_code;
5071 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5072 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5073 gcc_assert (nor_code != CODE_FOR_nothing);
5074 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5075 if (dmode != dest_mode)
5076 {
5077 rtx temp = gen_reg_rtx (dest_mode);
5078 convert_move (temp, mask, 0);
5079 return temp;
5080 }
5081 return mask;
5082 }
5083 break;
5084 case GE:
5085 case GEU:
5086 case LE:
5087 case LEU:
5088 /* Try GT/GTU/LT/LTU OR EQ */
5089 {
5090 rtx c_rtx, eq_rtx;
5091 enum insn_code ior_code;
5092 enum rtx_code new_code;
5093
5094 switch (rcode)
5095 {
5096 case GE: new_code = GT; break;
5097 case GEU: new_code = GTU; break;
5098 case LE: new_code = LT; break;
5099 case LEU: new_code = LTU; break;
5100 default:
5101 gcc_unreachable ();
5102 }
5103
5104 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5105 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5106
99bdde56 5107 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5108 gcc_assert (ior_code != CODE_FOR_nothing);
5109 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5110 if (dmode != dest_mode)
5111 {
5112 rtx temp = gen_reg_rtx (dest_mode);
5113 convert_move (temp, mask, 0);
5114 return temp;
5115 }
5116 return mask;
5117 }
5118 break;
5119 default:
5120 gcc_unreachable ();
5121 }
5122
5123 /* You only get two chances. */
5124 if (try_again)
5125 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5126
5127 gcc_assert (vec_cmp_insn != -1);
5128
5129 if (swap_operands)
5130 {
5131 rtx tmp;
5132 tmp = op0;
5133 op0 = op1;
5134 op1 = tmp;
5135 }
5136 }
5137
5138 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5139 if (dmode != dest_mode)
5140 {
5141 rtx temp = gen_reg_rtx (dest_mode);
5142 convert_move (temp, mask, 0);
5143 return temp;
5144 }
5145 return mask;
5146}
5147
5148
5149/* Emit vector conditional expression.
5150 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5151 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5152
5153int
5154spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5155 rtx cond, rtx cc_op0, rtx cc_op1)
5156{
5157 enum machine_mode dest_mode = GET_MODE (dest);
5158 enum rtx_code rcode = GET_CODE (cond);
5159 rtx mask;
5160
5161 /* Get the vector mask for the given relational operations. */
5162 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5163
5164 emit_insn(gen_selb (dest, op2, op1, mask));
5165
5166 return 1;
5167}
5168
6352eedf 5169static rtx
5170spu_force_reg (enum machine_mode mode, rtx op)
5171{
5172 rtx x, r;
5173 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5174 {
5175 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5176 || GET_MODE (op) == BLKmode)
5177 return force_reg (mode, convert_to_mode (mode, op, 0));
5178 abort ();
5179 }
5180
5181 r = force_reg (GET_MODE (op), op);
5182 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5183 {
5184 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5185 if (x)
5186 return x;
5187 }
5188
5189 x = gen_reg_rtx (mode);
5190 emit_insn (gen_spu_convert (x, r));
5191 return x;
5192}
5193
5194static void
5195spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5196{
5197 HOST_WIDE_INT v = 0;
5198 int lsbits;
5199 /* Check the range of immediate operands. */
5200 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5201 {
5202 int range = p - SPU_BTI_7;
5df189be 5203
5204 if (!CONSTANT_P (op))
6352eedf 5205 error ("%s expects an integer literal in the range [%d, %d].",
5206 d->name,
5207 spu_builtin_range[range].low, spu_builtin_range[range].high);
5208
5209 if (GET_CODE (op) == CONST
5210 && (GET_CODE (XEXP (op, 0)) == PLUS
5211 || GET_CODE (XEXP (op, 0)) == MINUS))
5212 {
5213 v = INTVAL (XEXP (XEXP (op, 0), 1));
5214 op = XEXP (XEXP (op, 0), 0);
5215 }
5216 else if (GET_CODE (op) == CONST_INT)
5217 v = INTVAL (op);
5df189be 5218 else if (GET_CODE (op) == CONST_VECTOR
5219 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5220 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5221
5222 /* The default for v is 0 which is valid in every range. */
5223 if (v < spu_builtin_range[range].low
5224 || v > spu_builtin_range[range].high)
5225 error ("%s expects an integer literal in the range [%d, %d]. ("
5226 HOST_WIDE_INT_PRINT_DEC ")",
5227 d->name,
5228 spu_builtin_range[range].low, spu_builtin_range[range].high,
5229 v);
6352eedf 5230
5231 switch (p)
5232 {
5233 case SPU_BTI_S10_4:
5234 lsbits = 4;
5235 break;
5236 case SPU_BTI_U16_2:
5237 /* This is only used in lqa, and stqa. Even though the insns
5238 encode 16 bits of the address (all but the 2 least
5239 significant), only 14 bits are used because it is masked to
5240 be 16 byte aligned. */
5241 lsbits = 4;
5242 break;
5243 case SPU_BTI_S16_2:
5244 /* This is used for lqr and stqr. */
5245 lsbits = 2;
5246 break;
5247 default:
5248 lsbits = 0;
5249 }
5250
5251 if (GET_CODE (op) == LABEL_REF
5252 || (GET_CODE (op) == SYMBOL_REF
5253 && SYMBOL_REF_FUNCTION_P (op))
5df189be 5254 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 5255 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5256 d->name);
5257 }
5258}
5259
5260
5261static void
5df189be 5262expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 5263 rtx target, rtx ops[])
5264{
5265 enum insn_code icode = d->icode;
5df189be 5266 int i = 0, a;
6352eedf 5267
5268 /* Expand the arguments into rtl. */
5269
5270 if (d->parm[0] != SPU_BTI_VOID)
5271 ops[i++] = target;
5272
5df189be 5273 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
6352eedf 5274 {
5df189be 5275 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 5276 if (arg == 0)
5277 abort ();
5278 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
6352eedf 5279 }
5280}
5281
5282static rtx
5283spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 5284 tree exp, rtx target)
6352eedf 5285{
5286 rtx pat;
5287 rtx ops[8];
5288 enum insn_code icode = d->icode;
5289 enum machine_mode mode, tmode;
5290 int i, p;
5291 tree return_type;
5292
5293 /* Set up ops[] with values from arglist. */
5df189be 5294 expand_builtin_args (d, exp, target, ops);
6352eedf 5295
5296 /* Handle the target operand which must be operand 0. */
5297 i = 0;
5298 if (d->parm[0] != SPU_BTI_VOID)
5299 {
5300
5301 /* We prefer the mode specified for the match_operand otherwise
5302 use the mode from the builtin function prototype. */
5303 tmode = insn_data[d->icode].operand[0].mode;
5304 if (tmode == VOIDmode)
5305 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5306
5307 /* Try to use target because not using it can lead to extra copies
5308 and when we are using all of the registers extra copies leads
5309 to extra spills. */
5310 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5311 ops[0] = target;
5312 else
5313 target = ops[0] = gen_reg_rtx (tmode);
5314
5315 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5316 abort ();
5317
5318 i++;
5319 }
5320
a76866d3 5321 if (d->fcode == SPU_MASK_FOR_LOAD)
5322 {
5323 enum machine_mode mode = insn_data[icode].operand[1].mode;
5324 tree arg;
5325 rtx addr, op, pat;
5326
5327 /* get addr */
5df189be 5328 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 5329 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5330 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5331 addr = memory_address (mode, op);
5332
5333 /* negate addr */
5334 op = gen_reg_rtx (GET_MODE (addr));
5335 emit_insn (gen_rtx_SET (VOIDmode, op,
5336 gen_rtx_NEG (GET_MODE (addr), addr)));
5337 op = gen_rtx_MEM (mode, op);
5338
5339 pat = GEN_FCN (icode) (target, op);
5340 if (!pat)
5341 return 0;
5342 emit_insn (pat);
5343 return target;
5344 }
5345
6352eedf 5346 /* Ignore align_hint, but still expand it's args in case they have
5347 side effects. */
5348 if (icode == CODE_FOR_spu_align_hint)
5349 return 0;
5350
5351 /* Handle the rest of the operands. */
5352 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5353 {
5354 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5355 mode = insn_data[d->icode].operand[i].mode;
5356 else
5357 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5358
5359 /* mode can be VOIDmode here for labels */
5360
5361 /* For specific intrinsics with an immediate operand, e.g.,
5362 si_ai(), we sometimes need to convert the scalar argument to a
5363 vector argument by splatting the scalar. */
5364 if (VECTOR_MODE_P (mode)
5365 && (GET_CODE (ops[i]) == CONST_INT
5366 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 5367 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 5368 {
5369 if (GET_CODE (ops[i]) == CONST_INT)
5370 ops[i] = spu_const (mode, INTVAL (ops[i]));
5371 else
5372 {
5373 rtx reg = gen_reg_rtx (mode);
5374 enum machine_mode imode = GET_MODE_INNER (mode);
5375 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5376 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5377 if (imode != GET_MODE (ops[i]))
5378 ops[i] = convert_to_mode (imode, ops[i],
5379 TYPE_UNSIGNED (spu_builtin_types
5380 [d->parm[i]]));
5381 emit_insn (gen_spu_splats (reg, ops[i]));
5382 ops[i] = reg;
5383 }
5384 }
5385
5df189be 5386 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5387
6352eedf 5388 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5389 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 5390 }
5391
5392 switch (insn_data[icode].n_operands)
5393 {
5394 case 0:
5395 pat = GEN_FCN (icode) (0);
5396 break;
5397 case 1:
5398 pat = GEN_FCN (icode) (ops[0]);
5399 break;
5400 case 2:
5401 pat = GEN_FCN (icode) (ops[0], ops[1]);
5402 break;
5403 case 3:
5404 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5405 break;
5406 case 4:
5407 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5408 break;
5409 case 5:
5410 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5411 break;
5412 case 6:
5413 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5414 break;
5415 default:
5416 abort ();
5417 }
5418
5419 if (!pat)
5420 abort ();
5421
5422 if (d->type == B_CALL || d->type == B_BISLED)
5423 emit_call_insn (pat);
5424 else if (d->type == B_JUMP)
5425 {
5426 emit_jump_insn (pat);
5427 emit_barrier ();
5428 }
5429 else
5430 emit_insn (pat);
5431
5432 return_type = spu_builtin_types[d->parm[0]];
5433 if (d->parm[0] != SPU_BTI_VOID
5434 && GET_MODE (target) != TYPE_MODE (return_type))
5435 {
5436 /* target is the return value. It should always be the mode of
5437 the builtin function prototype. */
5438 target = spu_force_reg (TYPE_MODE (return_type), target);
5439 }
5440
5441 return target;
5442}
5443
5444rtx
5445spu_expand_builtin (tree exp,
5446 rtx target,
5447 rtx subtarget ATTRIBUTE_UNUSED,
5448 enum machine_mode mode ATTRIBUTE_UNUSED,
5449 int ignore ATTRIBUTE_UNUSED)
5450{
5df189be 5451 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 5452 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 5453 struct spu_builtin_description *d;
5454
5455 if (fcode < NUM_SPU_BUILTINS)
5456 {
5457 d = &spu_builtins[fcode];
5458
5df189be 5459 return spu_expand_builtin_1 (d, exp, target);
6352eedf 5460 }
5461 abort ();
5462}
5463
e99f512d 5464/* Implement targetm.vectorize.builtin_mul_widen_even. */
5465static tree
5466spu_builtin_mul_widen_even (tree type)
5467{
e99f512d 5468 switch (TYPE_MODE (type))
5469 {
5470 case V8HImode:
5471 if (TYPE_UNSIGNED (type))
5472 return spu_builtins[SPU_MULE_0].fndecl;
5473 else
5474 return spu_builtins[SPU_MULE_1].fndecl;
5475 break;
5476 default:
5477 return NULL_TREE;
5478 }
5479}
5480
5481/* Implement targetm.vectorize.builtin_mul_widen_odd. */
5482static tree
5483spu_builtin_mul_widen_odd (tree type)
5484{
5485 switch (TYPE_MODE (type))
5486 {
5487 case V8HImode:
5488 if (TYPE_UNSIGNED (type))
5489 return spu_builtins[SPU_MULO_1].fndecl;
5490 else
5491 return spu_builtins[SPU_MULO_0].fndecl;
5492 break;
5493 default:
5494 return NULL_TREE;
5495 }
5496}
5497
a76866d3 5498/* Implement targetm.vectorize.builtin_mask_for_load. */
5499static tree
5500spu_builtin_mask_for_load (void)
5501{
5502 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5503 gcc_assert (d);
5504 return d->fndecl;
5505}
5df189be 5506
a28df51d 5507/* Implement targetm.vectorize.builtin_vectorization_cost. */
5508static int
5509spu_builtin_vectorization_cost (bool runtime_test)
5510{
5511 /* If the branch of the runtime test is taken - i.e. - the vectorized
5512 version is skipped - this incurs a misprediction cost (because the
5513 vectorized version is expected to be the fall-through). So we subtract
becfaa62 5514 the latency of a mispredicted branch from the costs that are incurred
a28df51d 5515 when the vectorized version is executed. */
5516 if (runtime_test)
5517 return -19;
5518 else
5519 return 0;
5520}
5521
0e87db76 5522/* Return true iff, data reference of TYPE can reach vector alignment (16)
5523 after applying N number of iterations. This routine does not determine
5524 how may iterations are required to reach desired alignment. */
5525
5526static bool
a9f1838b 5527spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 5528{
5529 if (is_packed)
5530 return false;
5531
5532 /* All other types are naturally aligned. */
5533 return true;
5534}
5535
d52fd16a 5536/* Count the total number of instructions in each pipe and return the
5537 maximum, which is used as the Minimum Iteration Interval (MII)
5538 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5539 -2 are instructions that can go in pipe0 or pipe1. */
5540static int
5541spu_sms_res_mii (struct ddg *g)
5542{
5543 int i;
5544 unsigned t[4] = {0, 0, 0, 0};
5545
5546 for (i = 0; i < g->num_nodes; i++)
5547 {
5548 rtx insn = g->nodes[i].insn;
5549 int p = get_pipe (insn) + 2;
5550
5551 assert (p >= 0);
5552 assert (p < 4);
5553
5554 t[p]++;
5555 if (dump_file && INSN_P (insn))
5556 fprintf (dump_file, "i%d %s %d %d\n",
5557 INSN_UID (insn),
5558 insn_data[INSN_CODE(insn)].name,
5559 p, t[p]);
5560 }
5561 if (dump_file)
5562 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
5563
5564 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
5565}
5566
5567
5df189be 5568void
5569spu_init_expanders (void)
5570{
5571 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5572 * frame_pointer_needed is true. We don't know that until we're
5573 * expanding the prologue. */
5574 if (cfun)
5575 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 5576}
5577
5578static enum machine_mode
5579spu_libgcc_cmp_return_mode (void)
5580{
5581
5582/* For SPU word mode is TI mode so it is better to use SImode
5583 for compare returns. */
5584 return SImode;
5585}
5586
5587static enum machine_mode
5588spu_libgcc_shift_count_mode (void)
5589{
5590/* For SPU word mode is TI mode so it is better to use SImode
5591 for shift counts. */
5592 return SImode;
5593}