]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
Commit missing Makefile update for r137158.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cf31d486 1/* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
51#include "c-common.h"
52#include "machmode.h"
53#include "tree-gimple.h"
54#include "tm-constrs.h"
55#include "spu-builtins.h"
d52fd16a 56#include "ddg.h"
6352eedf 57
58/* Builtin types, data and prototypes. */
59struct spu_builtin_range
60{
61 int low, high;
62};
63
64static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
77};
78
644459d0 79\f
80/* Target specific attribute specifications. */
81char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
82
83/* Prototypes and external defs. */
84static void spu_init_builtins (void);
85static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88static rtx get_pic_reg (void);
89static int need_to_save_reg (int regno, int saving);
90static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94static void emit_nop_for_insn (rtx insn);
95static bool insn_clobbers_hbr (rtx insn);
96static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
5474166e 98static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
99 enum machine_mode dmode);
644459d0 100static rtx get_branch_target (rtx branch);
101static void insert_branch_hints (void);
102static void insert_nops (void);
103static void spu_machine_dependent_reorg (void);
104static int spu_sched_issue_rate (void);
105static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
106 int can_issue_more);
107static int get_pipe (rtx insn);
108static int spu_sched_adjust_priority (rtx insn, int pri);
109static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
111 int flags,
112 unsigned char *no_add_attrs);
113static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116static int spu_naked_function_p (tree func);
fb80456a 117static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
118 const_tree type, unsigned char named);
644459d0 119static tree spu_build_builtin_va_list (void);
8a58ed0a 120static void spu_va_start (tree, rtx);
644459d0 121static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
122 tree * post_p);
123static int regno_aligned_for_load (int regno);
124static int store_with_one_insn_p (rtx mem);
644459d0 125static int mem_is_padded_component_ref (rtx x);
126static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
127static void spu_asm_globalize_label (FILE * file, const char *name);
128static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
129 int *total);
130static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
131static void spu_init_libfuncs (void);
fb80456a 132static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 133static void fix_range (const char *);
69ced2d6 134static void spu_encode_section_info (tree, rtx, int);
e99f512d 135static tree spu_builtin_mul_widen_even (tree);
136static tree spu_builtin_mul_widen_odd (tree);
a76866d3 137static tree spu_builtin_mask_for_load (void);
a28df51d 138static int spu_builtin_vectorization_cost (bool);
a9f1838b 139static bool spu_vector_alignment_reachable (const_tree, bool);
d52fd16a 140static int spu_sms_res_mii (struct ddg *g);
644459d0 141
142extern const char *reg_names[];
143rtx spu_compare_op0, spu_compare_op1;
144
5474166e 145/* Which instruction set architecture to use. */
146int spu_arch;
147/* Which cpu are we tuning for. */
148int spu_tune;
149
644459d0 150enum spu_immediate {
151 SPU_NONE,
152 SPU_IL,
153 SPU_ILA,
154 SPU_ILH,
155 SPU_ILHU,
156 SPU_ORI,
157 SPU_ORHI,
158 SPU_ORBI,
99369027 159 SPU_IOHL
644459d0 160};
dea01258 161enum immediate_class
162{
163 IC_POOL, /* constant pool */
164 IC_IL1, /* one il* instruction */
165 IC_IL2, /* both ilhu and iohl instructions */
166 IC_IL1s, /* one il* instruction */
167 IC_IL2s, /* both ilhu and iohl instructions */
168 IC_FSMBI, /* the fsmbi instruction */
169 IC_CPAT, /* one of the c*d instructions */
5df189be 170 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 171};
644459d0 172
173static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
174static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 175static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
176static enum immediate_class classify_immediate (rtx op,
177 enum machine_mode mode);
644459d0 178
1bd43494 179static enum machine_mode spu_unwind_word_mode (void);
180
ea32e033 181static enum machine_mode
182spu_libgcc_cmp_return_mode (void);
183
184static enum machine_mode
185spu_libgcc_shift_count_mode (void);
186
644459d0 187/* Built in types. */
188tree spu_builtin_types[SPU_BTI_MAX];
189\f
190/* TARGET overrides. */
191
192#undef TARGET_INIT_BUILTINS
193#define TARGET_INIT_BUILTINS spu_init_builtins
194
644459d0 195#undef TARGET_EXPAND_BUILTIN
196#define TARGET_EXPAND_BUILTIN spu_expand_builtin
197
1bd43494 198#undef TARGET_UNWIND_WORD_MODE
199#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 200
201/* The .8byte directive doesn't seem to work well for a 32 bit
202 architecture. */
203#undef TARGET_ASM_UNALIGNED_DI_OP
204#define TARGET_ASM_UNALIGNED_DI_OP NULL
205
206#undef TARGET_RTX_COSTS
207#define TARGET_RTX_COSTS spu_rtx_costs
208
209#undef TARGET_ADDRESS_COST
210#define TARGET_ADDRESS_COST hook_int_rtx_0
211
212#undef TARGET_SCHED_ISSUE_RATE
213#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
214
215#undef TARGET_SCHED_VARIABLE_ISSUE
216#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
217
218#undef TARGET_SCHED_ADJUST_PRIORITY
219#define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
220
221#undef TARGET_SCHED_ADJUST_COST
222#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
223
224const struct attribute_spec spu_attribute_table[];
225#undef TARGET_ATTRIBUTE_TABLE
226#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
227
228#undef TARGET_ASM_INTEGER
229#define TARGET_ASM_INTEGER spu_assemble_integer
230
231#undef TARGET_SCALAR_MODE_SUPPORTED_P
232#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
233
234#undef TARGET_VECTOR_MODE_SUPPORTED_P
235#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
236
237#undef TARGET_FUNCTION_OK_FOR_SIBCALL
238#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
239
240#undef TARGET_ASM_GLOBALIZE_LABEL
241#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
242
243#undef TARGET_PASS_BY_REFERENCE
244#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
245
246#undef TARGET_MUST_PASS_IN_STACK
247#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
248
249#undef TARGET_BUILD_BUILTIN_VA_LIST
250#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
251
8a58ed0a 252#undef TARGET_EXPAND_BUILTIN_VA_START
253#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
254
644459d0 255#undef TARGET_SETUP_INCOMING_VARARGS
256#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
257
258#undef TARGET_MACHINE_DEPENDENT_REORG
259#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
260
261#undef TARGET_GIMPLIFY_VA_ARG_EXPR
262#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
263
264#undef TARGET_DEFAULT_TARGET_FLAGS
265#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
266
267#undef TARGET_INIT_LIBFUNCS
268#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
269
270#undef TARGET_RETURN_IN_MEMORY
271#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
272
69ced2d6 273#undef TARGET_ENCODE_SECTION_INFO
274#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
275
e99f512d 276#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
277#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
278
279#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
280#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
281
a76866d3 282#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
283#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
284
a28df51d 285#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
286#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
287
0e87db76 288#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
289#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
290
ea32e033 291#undef TARGET_LIBGCC_CMP_RETURN_MODE
292#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
293
294#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
295#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
296
d52fd16a 297#undef TARGET_SCHED_SMS_RES_MII
298#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
299
644459d0 300struct gcc_target targetm = TARGET_INITIALIZER;
301
5df189be 302void
303spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
304{
5df189be 305 /* Override some of the default param values. With so many registers
306 larger values are better for these params. */
307 MAX_PENDING_LIST_LENGTH = 128;
308
309 /* With so many registers this is better on by default. */
310 flag_rename_registers = 1;
311}
312
644459d0 313/* Sometimes certain combinations of command options do not make sense
314 on a particular target machine. You can define a macro
315 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
316 executed once just after all the command options have been parsed. */
317void
318spu_override_options (void)
319{
14d408d9 320 /* Small loops will be unpeeled at -O3. For SPU it is more important
321 to keep code small by default. */
322 if (!flag_unroll_loops && !flag_peel_loops
323 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
324 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
325
644459d0 326 flag_omit_frame_pointer = 1;
327
328 if (align_functions < 8)
329 align_functions = 8;
c7b91b14 330
331 if (spu_fixed_range_string)
332 fix_range (spu_fixed_range_string);
5474166e 333
334 /* Determine processor architectural level. */
335 if (spu_arch_string)
336 {
337 if (strcmp (&spu_arch_string[0], "cell") == 0)
338 spu_arch = PROCESSOR_CELL;
339 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
340 spu_arch = PROCESSOR_CELLEDP;
341 else
342 error ("Unknown architecture '%s'", &spu_arch_string[0]);
343 }
344
345 /* Determine processor to tune for. */
346 if (spu_tune_string)
347 {
348 if (strcmp (&spu_tune_string[0], "cell") == 0)
349 spu_tune = PROCESSOR_CELL;
350 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
351 spu_tune = PROCESSOR_CELLEDP;
352 else
353 error ("Unknown architecture '%s'", &spu_tune_string[0]);
354 }
644459d0 355}
356\f
357/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
358 struct attribute_spec.handler. */
359
360/* Table of machine attributes. */
361const struct attribute_spec spu_attribute_table[] =
362{
363 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
364 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
365 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
366 { NULL, 0, 0, false, false, false, NULL }
367};
368
369/* True if MODE is valid for the target. By "valid", we mean able to
370 be manipulated in non-trivial ways. In particular, this means all
371 the arithmetic is supported. */
372static bool
373spu_scalar_mode_supported_p (enum machine_mode mode)
374{
375 switch (mode)
376 {
377 case QImode:
378 case HImode:
379 case SImode:
380 case SFmode:
381 case DImode:
382 case TImode:
383 case DFmode:
384 return true;
385
386 default:
387 return false;
388 }
389}
390
391/* Similarly for vector modes. "Supported" here is less strict. At
392 least some operations are supported; need to check optabs or builtins
393 for further details. */
394static bool
395spu_vector_mode_supported_p (enum machine_mode mode)
396{
397 switch (mode)
398 {
399 case V16QImode:
400 case V8HImode:
401 case V4SImode:
402 case V2DImode:
403 case V4SFmode:
404 case V2DFmode:
405 return true;
406
407 default:
408 return false;
409 }
410}
411
412/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
413 least significant bytes of the outer mode. This function returns
414 TRUE for the SUBREG's where this is correct. */
415int
416valid_subreg (rtx op)
417{
418 enum machine_mode om = GET_MODE (op);
419 enum machine_mode im = GET_MODE (SUBREG_REG (op));
420 return om != VOIDmode && im != VOIDmode
421 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
422 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
423}
424
425/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 426 and adjust the start offset. */
644459d0 427static rtx
428adjust_operand (rtx op, HOST_WIDE_INT * start)
429{
430 enum machine_mode mode;
431 int op_size;
432 /* Strip any SUBREG */
433 if (GET_CODE (op) == SUBREG)
434 {
435 if (start)
436 *start -=
437 GET_MODE_BITSIZE (GET_MODE (op)) -
438 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
439 op = SUBREG_REG (op);
440 }
441 /* If it is smaller than SI, assure a SUBREG */
442 op_size = GET_MODE_BITSIZE (GET_MODE (op));
443 if (op_size < 32)
444 {
445 if (start)
446 *start += 32 - op_size;
447 op_size = 32;
448 }
449 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
450 mode = mode_for_size (op_size, MODE_INT, 0);
451 if (mode != GET_MODE (op))
452 op = gen_rtx_SUBREG (mode, op, 0);
453 return op;
454}
455
456void
457spu_expand_extv (rtx ops[], int unsignedp)
458{
459 HOST_WIDE_INT width = INTVAL (ops[2]);
460 HOST_WIDE_INT start = INTVAL (ops[3]);
461 HOST_WIDE_INT src_size, dst_size;
462 enum machine_mode src_mode, dst_mode;
463 rtx dst = ops[0], src = ops[1];
464 rtx s;
465
466 dst = adjust_operand (ops[0], 0);
467 dst_mode = GET_MODE (dst);
468 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
469
644459d0 470 src = adjust_operand (src, &start);
471 src_mode = GET_MODE (src);
472 src_size = GET_MODE_BITSIZE (GET_MODE (src));
473
474 if (start > 0)
475 {
476 s = gen_reg_rtx (src_mode);
477 switch (src_mode)
478 {
479 case SImode:
480 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
481 break;
482 case DImode:
483 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
484 break;
485 case TImode:
486 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
487 break;
488 default:
489 abort ();
490 }
491 src = s;
492 }
493
494 if (width < src_size)
495 {
496 rtx pat;
497 int icode;
498 switch (src_mode)
499 {
500 case SImode:
501 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
502 break;
503 case DImode:
504 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
505 break;
506 case TImode:
507 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
508 break;
509 default:
510 abort ();
511 }
512 s = gen_reg_rtx (src_mode);
513 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
514 emit_insn (pat);
515 src = s;
516 }
517
518 convert_move (dst, src, unsignedp);
519}
520
521void
522spu_expand_insv (rtx ops[])
523{
524 HOST_WIDE_INT width = INTVAL (ops[1]);
525 HOST_WIDE_INT start = INTVAL (ops[2]);
526 HOST_WIDE_INT maskbits;
527 enum machine_mode dst_mode, src_mode;
528 rtx dst = ops[0], src = ops[3];
529 int dst_size, src_size;
530 rtx mask;
531 rtx shift_reg;
532 int shift;
533
534
535 if (GET_CODE (ops[0]) == MEM)
536 dst = gen_reg_rtx (TImode);
537 else
538 dst = adjust_operand (dst, &start);
539 dst_mode = GET_MODE (dst);
540 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
541
542 if (CONSTANT_P (src))
543 {
544 enum machine_mode m =
545 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
546 src = force_reg (m, convert_to_mode (m, src, 0));
547 }
548 src = adjust_operand (src, 0);
549 src_mode = GET_MODE (src);
550 src_size = GET_MODE_BITSIZE (GET_MODE (src));
551
552 mask = gen_reg_rtx (dst_mode);
553 shift_reg = gen_reg_rtx (dst_mode);
554 shift = dst_size - start - width;
555
556 /* It's not safe to use subreg here because the compiler assumes
557 that the SUBREG_REG is right justified in the SUBREG. */
558 convert_move (shift_reg, src, 1);
559
560 if (shift > 0)
561 {
562 switch (dst_mode)
563 {
564 case SImode:
565 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
566 break;
567 case DImode:
568 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
569 break;
570 case TImode:
571 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
572 break;
573 default:
574 abort ();
575 }
576 }
577 else if (shift < 0)
578 abort ();
579
580 switch (dst_size)
581 {
582 case 32:
583 maskbits = (-1ll << (32 - width - start));
584 if (start)
585 maskbits += (1ll << (32 - start));
586 emit_move_insn (mask, GEN_INT (maskbits));
587 break;
588 case 64:
589 maskbits = (-1ll << (64 - width - start));
590 if (start)
591 maskbits += (1ll << (64 - start));
592 emit_move_insn (mask, GEN_INT (maskbits));
593 break;
594 case 128:
595 {
596 unsigned char arr[16];
597 int i = start / 8;
598 memset (arr, 0, sizeof (arr));
599 arr[i] = 0xff >> (start & 7);
600 for (i++; i <= (start + width - 1) / 8; i++)
601 arr[i] = 0xff;
602 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
603 emit_move_insn (mask, array_to_constant (TImode, arr));
604 }
605 break;
606 default:
607 abort ();
608 }
609 if (GET_CODE (ops[0]) == MEM)
610 {
611 rtx aligned = gen_reg_rtx (SImode);
612 rtx low = gen_reg_rtx (SImode);
613 rtx addr = gen_reg_rtx (SImode);
614 rtx rotl = gen_reg_rtx (SImode);
615 rtx mask0 = gen_reg_rtx (TImode);
616 rtx mem;
617
618 emit_move_insn (addr, XEXP (ops[0], 0));
619 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
620 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
621 emit_insn (gen_negsi2 (rotl, low));
622 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
623 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
624 mem = change_address (ops[0], TImode, aligned);
625 set_mem_alias_set (mem, 0);
626 emit_move_insn (dst, mem);
627 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
628 emit_move_insn (mem, dst);
629 if (start + width > MEM_ALIGN (ops[0]))
630 {
631 rtx shl = gen_reg_rtx (SImode);
632 rtx mask1 = gen_reg_rtx (TImode);
633 rtx dst1 = gen_reg_rtx (TImode);
634 rtx mem1;
635 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
636 emit_insn (gen_shlqby_ti (mask1, mask, shl));
637 mem1 = adjust_address (mem, TImode, 16);
638 set_mem_alias_set (mem1, 0);
639 emit_move_insn (dst1, mem1);
640 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
641 emit_move_insn (mem1, dst1);
642 }
643 }
644 else
71cd778d 645 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 646}
647
648
649int
650spu_expand_block_move (rtx ops[])
651{
652 HOST_WIDE_INT bytes, align, offset;
653 rtx src, dst, sreg, dreg, target;
654 int i;
655 if (GET_CODE (ops[2]) != CONST_INT
656 || GET_CODE (ops[3]) != CONST_INT
657 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
658 return 0;
659
660 bytes = INTVAL (ops[2]);
661 align = INTVAL (ops[3]);
662
663 if (bytes <= 0)
664 return 1;
665
666 dst = ops[0];
667 src = ops[1];
668
669 if (align == 16)
670 {
671 for (offset = 0; offset + 16 <= bytes; offset += 16)
672 {
673 dst = adjust_address (ops[0], V16QImode, offset);
674 src = adjust_address (ops[1], V16QImode, offset);
675 emit_move_insn (dst, src);
676 }
677 if (offset < bytes)
678 {
679 rtx mask;
680 unsigned char arr[16] = { 0 };
681 for (i = 0; i < bytes - offset; i++)
682 arr[i] = 0xff;
683 dst = adjust_address (ops[0], V16QImode, offset);
684 src = adjust_address (ops[1], V16QImode, offset);
685 mask = gen_reg_rtx (V16QImode);
686 sreg = gen_reg_rtx (V16QImode);
687 dreg = gen_reg_rtx (V16QImode);
688 target = gen_reg_rtx (V16QImode);
689 emit_move_insn (mask, array_to_constant (V16QImode, arr));
690 emit_move_insn (dreg, dst);
691 emit_move_insn (sreg, src);
692 emit_insn (gen_selb (target, dreg, sreg, mask));
693 emit_move_insn (dst, target);
694 }
695 return 1;
696 }
697 return 0;
698}
699
700enum spu_comp_code
701{ SPU_EQ, SPU_GT, SPU_GTU };
702
5474166e 703int spu_comp_icode[12][3] = {
704 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
705 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
706 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
707 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
708 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
709 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
710 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
711 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
712 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
713 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
714 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
715 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 716};
717
718/* Generate a compare for CODE. Return a brand-new rtx that represents
719 the result of the compare. GCC can figure this out too if we don't
720 provide all variations of compares, but GCC always wants to use
721 WORD_MODE, we can generate better code in most cases if we do it
722 ourselves. */
723void
724spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
725{
726 int reverse_compare = 0;
727 int reverse_test = 0;
5d70b918 728 rtx compare_result, eq_result;
729 rtx comp_rtx, eq_rtx;
644459d0 730 rtx target = operands[0];
731 enum machine_mode comp_mode;
732 enum machine_mode op_mode;
5d70b918 733 enum spu_comp_code scode, eq_code, ior_code;
644459d0 734 int index;
5d70b918 735 int eq_test = 0;
644459d0 736
737 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
738 and so on, to keep the constant in operand 1. */
739 if (GET_CODE (spu_compare_op1) == CONST_INT)
740 {
741 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
742 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
743 switch (code)
744 {
745 case GE:
746 spu_compare_op1 = GEN_INT (val);
747 code = GT;
748 break;
749 case LT:
750 spu_compare_op1 = GEN_INT (val);
751 code = LE;
752 break;
753 case GEU:
754 spu_compare_op1 = GEN_INT (val);
755 code = GTU;
756 break;
757 case LTU:
758 spu_compare_op1 = GEN_INT (val);
759 code = LEU;
760 break;
761 default:
762 break;
763 }
764 }
765
5d70b918 766 comp_mode = SImode;
767 op_mode = GET_MODE (spu_compare_op0);
768
644459d0 769 switch (code)
770 {
771 case GE:
644459d0 772 scode = SPU_GT;
07027691 773 if (HONOR_NANS (op_mode))
5d70b918 774 {
775 reverse_compare = 0;
776 reverse_test = 0;
777 eq_test = 1;
778 eq_code = SPU_EQ;
779 }
780 else
781 {
782 reverse_compare = 1;
783 reverse_test = 1;
784 }
644459d0 785 break;
786 case LE:
644459d0 787 scode = SPU_GT;
07027691 788 if (HONOR_NANS (op_mode))
5d70b918 789 {
790 reverse_compare = 1;
791 reverse_test = 0;
792 eq_test = 1;
793 eq_code = SPU_EQ;
794 }
795 else
796 {
797 reverse_compare = 0;
798 reverse_test = 1;
799 }
644459d0 800 break;
801 case LT:
802 reverse_compare = 1;
803 reverse_test = 0;
804 scode = SPU_GT;
805 break;
806 case GEU:
807 reverse_compare = 1;
808 reverse_test = 1;
809 scode = SPU_GTU;
810 break;
811 case LEU:
812 reverse_compare = 0;
813 reverse_test = 1;
814 scode = SPU_GTU;
815 break;
816 case LTU:
817 reverse_compare = 1;
818 reverse_test = 0;
819 scode = SPU_GTU;
820 break;
821 case NE:
822 reverse_compare = 0;
823 reverse_test = 1;
824 scode = SPU_EQ;
825 break;
826
827 case EQ:
828 scode = SPU_EQ;
829 break;
830 case GT:
831 scode = SPU_GT;
832 break;
833 case GTU:
834 scode = SPU_GTU;
835 break;
836 default:
837 scode = SPU_EQ;
838 break;
839 }
840
644459d0 841 switch (op_mode)
842 {
843 case QImode:
844 index = 0;
845 comp_mode = QImode;
846 break;
847 case HImode:
848 index = 1;
849 comp_mode = HImode;
850 break;
851 case SImode:
852 index = 2;
853 break;
854 case DImode:
855 index = 3;
856 break;
857 case TImode:
858 index = 4;
859 break;
860 case SFmode:
861 index = 5;
862 break;
863 case DFmode:
864 index = 6;
865 break;
866 case V16QImode:
5474166e 867 index = 7;
868 comp_mode = op_mode;
869 break;
644459d0 870 case V8HImode:
5474166e 871 index = 8;
872 comp_mode = op_mode;
873 break;
644459d0 874 case V4SImode:
5474166e 875 index = 9;
876 comp_mode = op_mode;
877 break;
644459d0 878 case V4SFmode:
5474166e 879 index = 10;
880 comp_mode = V4SImode;
881 break;
644459d0 882 case V2DFmode:
5474166e 883 index = 11;
884 comp_mode = V2DImode;
644459d0 885 break;
5474166e 886 case V2DImode:
644459d0 887 default:
888 abort ();
889 }
890
07027691 891 if (GET_MODE (spu_compare_op1) == DFmode
892 && (scode != SPU_GT && scode != SPU_EQ))
893 abort ();
644459d0 894
895 if (is_set == 0 && spu_compare_op1 == const0_rtx
896 && (GET_MODE (spu_compare_op0) == SImode
897 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
898 {
899 /* Don't need to set a register with the result when we are
900 comparing against zero and branching. */
901 reverse_test = !reverse_test;
902 compare_result = spu_compare_op0;
903 }
904 else
905 {
906 compare_result = gen_reg_rtx (comp_mode);
907
908 if (reverse_compare)
909 {
910 rtx t = spu_compare_op1;
911 spu_compare_op1 = spu_compare_op0;
912 spu_compare_op0 = t;
913 }
914
915 if (spu_comp_icode[index][scode] == 0)
916 abort ();
917
918 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
919 (spu_compare_op0, op_mode))
920 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
921 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
922 (spu_compare_op1, op_mode))
923 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
924 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
925 spu_compare_op0,
926 spu_compare_op1);
927 if (comp_rtx == 0)
928 abort ();
929 emit_insn (comp_rtx);
930
5d70b918 931 if (eq_test)
932 {
933 eq_result = gen_reg_rtx (comp_mode);
934 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
935 spu_compare_op0,
936 spu_compare_op1);
937 if (eq_rtx == 0)
938 abort ();
939 emit_insn (eq_rtx);
940 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
941 gcc_assert (ior_code != CODE_FOR_nothing);
942 emit_insn (GEN_FCN (ior_code)
943 (compare_result, compare_result, eq_result));
944 }
644459d0 945 }
946
947 if (is_set == 0)
948 {
949 rtx bcomp;
950 rtx loc_ref;
951
952 /* We don't have branch on QI compare insns, so we convert the
953 QI compare result to a HI result. */
954 if (comp_mode == QImode)
955 {
956 rtx old_res = compare_result;
957 compare_result = gen_reg_rtx (HImode);
958 comp_mode = HImode;
959 emit_insn (gen_extendqihi2 (compare_result, old_res));
960 }
961
962 if (reverse_test)
963 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
964 else
965 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
966
967 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
968 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
969 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
970 loc_ref, pc_rtx)));
971 }
972 else if (is_set == 2)
973 {
974 int compare_size = GET_MODE_BITSIZE (comp_mode);
975 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
976 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
977 rtx select_mask;
978 rtx op_t = operands[2];
979 rtx op_f = operands[3];
980
981 /* The result of the comparison can be SI, HI or QI mode. Create a
982 mask based on that result. */
983 if (target_size > compare_size)
984 {
985 select_mask = gen_reg_rtx (mode);
986 emit_insn (gen_extend_compare (select_mask, compare_result));
987 }
988 else if (target_size < compare_size)
989 select_mask =
990 gen_rtx_SUBREG (mode, compare_result,
991 (compare_size - target_size) / BITS_PER_UNIT);
992 else if (comp_mode != mode)
993 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
994 else
995 select_mask = compare_result;
996
997 if (GET_MODE (target) != GET_MODE (op_t)
998 || GET_MODE (target) != GET_MODE (op_f))
999 abort ();
1000
1001 if (reverse_test)
1002 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1003 else
1004 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1005 }
1006 else
1007 {
1008 if (reverse_test)
1009 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1010 gen_rtx_NOT (comp_mode, compare_result)));
1011 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1012 emit_insn (gen_extendhisi2 (target, compare_result));
1013 else if (GET_MODE (target) == SImode
1014 && GET_MODE (compare_result) == QImode)
1015 emit_insn (gen_extend_compare (target, compare_result));
1016 else
1017 emit_move_insn (target, compare_result);
1018 }
1019}
1020
1021HOST_WIDE_INT
1022const_double_to_hwint (rtx x)
1023{
1024 HOST_WIDE_INT val;
1025 REAL_VALUE_TYPE rv;
1026 if (GET_MODE (x) == SFmode)
1027 {
1028 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1029 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1030 }
1031 else if (GET_MODE (x) == DFmode)
1032 {
1033 long l[2];
1034 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1035 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1036 val = l[0];
1037 val = (val << 32) | (l[1] & 0xffffffff);
1038 }
1039 else
1040 abort ();
1041 return val;
1042}
1043
1044rtx
1045hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1046{
1047 long tv[2];
1048 REAL_VALUE_TYPE rv;
1049 gcc_assert (mode == SFmode || mode == DFmode);
1050
1051 if (mode == SFmode)
1052 tv[0] = (v << 32) >> 32;
1053 else if (mode == DFmode)
1054 {
1055 tv[1] = (v << 32) >> 32;
1056 tv[0] = v >> 32;
1057 }
1058 real_from_target (&rv, tv, mode);
1059 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1060}
1061
1062void
1063print_operand_address (FILE * file, register rtx addr)
1064{
1065 rtx reg;
1066 rtx offset;
1067
e04cf423 1068 if (GET_CODE (addr) == AND
1069 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1070 && INTVAL (XEXP (addr, 1)) == -16)
1071 addr = XEXP (addr, 0);
1072
644459d0 1073 switch (GET_CODE (addr))
1074 {
1075 case REG:
1076 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1077 break;
1078
1079 case PLUS:
1080 reg = XEXP (addr, 0);
1081 offset = XEXP (addr, 1);
1082 if (GET_CODE (offset) == REG)
1083 {
1084 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1085 reg_names[REGNO (offset)]);
1086 }
1087 else if (GET_CODE (offset) == CONST_INT)
1088 {
1089 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1090 INTVAL (offset), reg_names[REGNO (reg)]);
1091 }
1092 else
1093 abort ();
1094 break;
1095
1096 case CONST:
1097 case LABEL_REF:
1098 case SYMBOL_REF:
1099 case CONST_INT:
1100 output_addr_const (file, addr);
1101 break;
1102
1103 default:
1104 debug_rtx (addr);
1105 abort ();
1106 }
1107}
1108
1109void
1110print_operand (FILE * file, rtx x, int code)
1111{
1112 enum machine_mode mode = GET_MODE (x);
1113 HOST_WIDE_INT val;
1114 unsigned char arr[16];
1115 int xcode = GET_CODE (x);
dea01258 1116 int i, info;
644459d0 1117 if (GET_MODE (x) == VOIDmode)
1118 switch (code)
1119 {
644459d0 1120 case 'L': /* 128 bits, signed */
1121 case 'm': /* 128 bits, signed */
1122 case 'T': /* 128 bits, signed */
1123 case 't': /* 128 bits, signed */
1124 mode = TImode;
1125 break;
644459d0 1126 case 'K': /* 64 bits, signed */
1127 case 'k': /* 64 bits, signed */
1128 case 'D': /* 64 bits, signed */
1129 case 'd': /* 64 bits, signed */
1130 mode = DImode;
1131 break;
644459d0 1132 case 'J': /* 32 bits, signed */
1133 case 'j': /* 32 bits, signed */
1134 case 's': /* 32 bits, signed */
1135 case 'S': /* 32 bits, signed */
1136 mode = SImode;
1137 break;
1138 }
1139 switch (code)
1140 {
1141
1142 case 'j': /* 32 bits, signed */
1143 case 'k': /* 64 bits, signed */
1144 case 'm': /* 128 bits, signed */
1145 if (xcode == CONST_INT
1146 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1147 {
1148 gcc_assert (logical_immediate_p (x, mode));
1149 constant_to_array (mode, x, arr);
1150 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1151 val = trunc_int_for_mode (val, SImode);
1152 switch (which_logical_immediate (val))
1153 {
1154 case SPU_ORI:
1155 break;
1156 case SPU_ORHI:
1157 fprintf (file, "h");
1158 break;
1159 case SPU_ORBI:
1160 fprintf (file, "b");
1161 break;
1162 default:
1163 gcc_unreachable();
1164 }
1165 }
1166 else
1167 gcc_unreachable();
1168 return;
1169
1170 case 'J': /* 32 bits, signed */
1171 case 'K': /* 64 bits, signed */
1172 case 'L': /* 128 bits, signed */
1173 if (xcode == CONST_INT
1174 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1175 {
1176 gcc_assert (logical_immediate_p (x, mode)
1177 || iohl_immediate_p (x, mode));
1178 constant_to_array (mode, x, arr);
1179 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1180 val = trunc_int_for_mode (val, SImode);
1181 switch (which_logical_immediate (val))
1182 {
1183 case SPU_ORI:
1184 case SPU_IOHL:
1185 break;
1186 case SPU_ORHI:
1187 val = trunc_int_for_mode (val, HImode);
1188 break;
1189 case SPU_ORBI:
1190 val = trunc_int_for_mode (val, QImode);
1191 break;
1192 default:
1193 gcc_unreachable();
1194 }
1195 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1196 }
1197 else
1198 gcc_unreachable();
1199 return;
1200
1201 case 't': /* 128 bits, signed */
1202 case 'd': /* 64 bits, signed */
1203 case 's': /* 32 bits, signed */
dea01258 1204 if (CONSTANT_P (x))
644459d0 1205 {
dea01258 1206 enum immediate_class c = classify_immediate (x, mode);
1207 switch (c)
1208 {
1209 case IC_IL1:
1210 constant_to_array (mode, x, arr);
1211 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1212 val = trunc_int_for_mode (val, SImode);
1213 switch (which_immediate_load (val))
1214 {
1215 case SPU_IL:
1216 break;
1217 case SPU_ILA:
1218 fprintf (file, "a");
1219 break;
1220 case SPU_ILH:
1221 fprintf (file, "h");
1222 break;
1223 case SPU_ILHU:
1224 fprintf (file, "hu");
1225 break;
1226 default:
1227 gcc_unreachable ();
1228 }
1229 break;
1230 case IC_CPAT:
1231 constant_to_array (mode, x, arr);
1232 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1233 if (info == 1)
1234 fprintf (file, "b");
1235 else if (info == 2)
1236 fprintf (file, "h");
1237 else if (info == 4)
1238 fprintf (file, "w");
1239 else if (info == 8)
1240 fprintf (file, "d");
1241 break;
1242 case IC_IL1s:
1243 if (xcode == CONST_VECTOR)
1244 {
1245 x = CONST_VECTOR_ELT (x, 0);
1246 xcode = GET_CODE (x);
1247 }
1248 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1249 fprintf (file, "a");
1250 else if (xcode == HIGH)
1251 fprintf (file, "hu");
1252 break;
1253 case IC_FSMBI:
5df189be 1254 case IC_FSMBI2:
dea01258 1255 case IC_IL2:
1256 case IC_IL2s:
1257 case IC_POOL:
1258 abort ();
1259 }
644459d0 1260 }
644459d0 1261 else
1262 gcc_unreachable ();
1263 return;
1264
1265 case 'T': /* 128 bits, signed */
1266 case 'D': /* 64 bits, signed */
1267 case 'S': /* 32 bits, signed */
dea01258 1268 if (CONSTANT_P (x))
644459d0 1269 {
dea01258 1270 enum immediate_class c = classify_immediate (x, mode);
1271 switch (c)
644459d0 1272 {
dea01258 1273 case IC_IL1:
1274 constant_to_array (mode, x, arr);
1275 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1276 val = trunc_int_for_mode (val, SImode);
1277 switch (which_immediate_load (val))
1278 {
1279 case SPU_IL:
1280 case SPU_ILA:
1281 break;
1282 case SPU_ILH:
1283 case SPU_ILHU:
1284 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1285 break;
1286 default:
1287 gcc_unreachable ();
1288 }
1289 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1290 break;
1291 case IC_FSMBI:
1292 constant_to_array (mode, x, arr);
1293 val = 0;
1294 for (i = 0; i < 16; i++)
1295 {
1296 val <<= 1;
1297 val |= arr[i] & 1;
1298 }
1299 print_operand (file, GEN_INT (val), 0);
1300 break;
1301 case IC_CPAT:
1302 constant_to_array (mode, x, arr);
1303 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1304 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1305 break;
dea01258 1306 case IC_IL1s:
dea01258 1307 if (xcode == HIGH)
5df189be 1308 x = XEXP (x, 0);
1309 if (GET_CODE (x) == CONST_VECTOR)
1310 x = CONST_VECTOR_ELT (x, 0);
1311 output_addr_const (file, x);
1312 if (xcode == HIGH)
1313 fprintf (file, "@h");
644459d0 1314 break;
dea01258 1315 case IC_IL2:
1316 case IC_IL2s:
5df189be 1317 case IC_FSMBI2:
dea01258 1318 case IC_POOL:
1319 abort ();
644459d0 1320 }
c8befdb9 1321 }
644459d0 1322 else
1323 gcc_unreachable ();
1324 return;
1325
644459d0 1326 case 'C':
1327 if (xcode == CONST_INT)
1328 {
1329 /* Only 4 least significant bits are relevant for generate
1330 control word instructions. */
1331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1332 return;
1333 }
1334 break;
1335
1336 case 'M': /* print code for c*d */
1337 if (GET_CODE (x) == CONST_INT)
1338 switch (INTVAL (x))
1339 {
1340 case 1:
1341 fprintf (file, "b");
1342 break;
1343 case 2:
1344 fprintf (file, "h");
1345 break;
1346 case 4:
1347 fprintf (file, "w");
1348 break;
1349 case 8:
1350 fprintf (file, "d");
1351 break;
1352 default:
1353 gcc_unreachable();
1354 }
1355 else
1356 gcc_unreachable();
1357 return;
1358
1359 case 'N': /* Negate the operand */
1360 if (xcode == CONST_INT)
1361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1362 else if (xcode == CONST_VECTOR)
1363 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1364 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1365 return;
1366
1367 case 'I': /* enable/disable interrupts */
1368 if (xcode == CONST_INT)
1369 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1370 return;
1371
1372 case 'b': /* branch modifiers */
1373 if (xcode == REG)
1374 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1375 else if (COMPARISON_P (x))
1376 fprintf (file, "%s", xcode == NE ? "n" : "");
1377 return;
1378
1379 case 'i': /* indirect call */
1380 if (xcode == MEM)
1381 {
1382 if (GET_CODE (XEXP (x, 0)) == REG)
1383 /* Used in indirect function calls. */
1384 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1385 else
1386 output_address (XEXP (x, 0));
1387 }
1388 return;
1389
1390 case 'p': /* load/store */
1391 if (xcode == MEM)
1392 {
1393 x = XEXP (x, 0);
1394 xcode = GET_CODE (x);
1395 }
e04cf423 1396 if (xcode == AND)
1397 {
1398 x = XEXP (x, 0);
1399 xcode = GET_CODE (x);
1400 }
644459d0 1401 if (xcode == REG)
1402 fprintf (file, "d");
1403 else if (xcode == CONST_INT)
1404 fprintf (file, "a");
1405 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1406 fprintf (file, "r");
1407 else if (xcode == PLUS || xcode == LO_SUM)
1408 {
1409 if (GET_CODE (XEXP (x, 1)) == REG)
1410 fprintf (file, "x");
1411 else
1412 fprintf (file, "d");
1413 }
1414 return;
1415
5df189be 1416 case 'e':
1417 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1418 val &= 0x7;
1419 output_addr_const (file, GEN_INT (val));
1420 return;
1421
1422 case 'f':
1423 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1424 val &= 0x1f;
1425 output_addr_const (file, GEN_INT (val));
1426 return;
1427
1428 case 'g':
1429 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1430 val &= 0x3f;
1431 output_addr_const (file, GEN_INT (val));
1432 return;
1433
1434 case 'h':
1435 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1436 val = (val >> 3) & 0x1f;
1437 output_addr_const (file, GEN_INT (val));
1438 return;
1439
1440 case 'E':
1441 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1442 val = -val;
1443 val &= 0x7;
1444 output_addr_const (file, GEN_INT (val));
1445 return;
1446
1447 case 'F':
1448 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1449 val = -val;
1450 val &= 0x1f;
1451 output_addr_const (file, GEN_INT (val));
1452 return;
1453
1454 case 'G':
1455 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1456 val = -val;
1457 val &= 0x3f;
1458 output_addr_const (file, GEN_INT (val));
1459 return;
1460
1461 case 'H':
1462 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1463 val = -(val & -8ll);
1464 val = (val >> 3) & 0x1f;
1465 output_addr_const (file, GEN_INT (val));
1466 return;
1467
644459d0 1468 case 0:
1469 if (xcode == REG)
1470 fprintf (file, "%s", reg_names[REGNO (x)]);
1471 else if (xcode == MEM)
1472 output_address (XEXP (x, 0));
1473 else if (xcode == CONST_VECTOR)
dea01258 1474 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1475 else
1476 output_addr_const (file, x);
1477 return;
1478
f6a0d06f 1479 /* unused letters
5df189be 1480 o qr uvw yz
1481 AB OPQR UVWXYZ */
644459d0 1482 default:
1483 output_operand_lossage ("invalid %%xn code");
1484 }
1485 gcc_unreachable ();
1486}
1487
1488extern char call_used_regs[];
644459d0 1489
1490/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1491 caller saved register. For leaf functions it is more efficient to
1492 use a volatile register because we won't need to save and restore the
1493 pic register. This routine is only valid after register allocation
1494 is completed, so we can pick an unused register. */
1495static rtx
1496get_pic_reg (void)
1497{
1498 rtx pic_reg = pic_offset_table_rtx;
1499 if (!reload_completed && !reload_in_progress)
1500 abort ();
1501 return pic_reg;
1502}
1503
5df189be 1504/* Split constant addresses to handle cases that are too large.
1505 Add in the pic register when in PIC mode.
1506 Split immediates that require more than 1 instruction. */
dea01258 1507int
1508spu_split_immediate (rtx * ops)
c8befdb9 1509{
dea01258 1510 enum machine_mode mode = GET_MODE (ops[0]);
1511 enum immediate_class c = classify_immediate (ops[1], mode);
1512
1513 switch (c)
c8befdb9 1514 {
dea01258 1515 case IC_IL2:
1516 {
1517 unsigned char arrhi[16];
1518 unsigned char arrlo[16];
1519 rtx to, hi, lo;
1520 int i;
1521 constant_to_array (mode, ops[1], arrhi);
e1ba4a27 1522 to = !can_create_pseudo_p () ? ops[0] : gen_reg_rtx (mode);
dea01258 1523 for (i = 0; i < 16; i += 4)
1524 {
1525 arrlo[i + 2] = arrhi[i + 2];
1526 arrlo[i + 3] = arrhi[i + 3];
1527 arrlo[i + 0] = arrlo[i + 1] = 0;
1528 arrhi[i + 2] = arrhi[i + 3] = 0;
1529 }
1530 hi = array_to_constant (mode, arrhi);
1531 lo = array_to_constant (mode, arrlo);
1532 emit_move_insn (to, hi);
1533 emit_insn (gen_rtx_SET
1534 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1535 return 1;
1536 }
5df189be 1537 case IC_FSMBI2:
1538 {
1539 unsigned char arr_fsmbi[16];
1540 unsigned char arr_andbi[16];
1541 rtx to, reg_fsmbi, reg_and;
1542 int i;
1543 enum machine_mode imode = mode;
1544 /* We need to do reals as ints because the constant used in the
1545 * AND might not be a legitimate real constant. */
1546 imode = int_mode_for_mode (mode);
1547 constant_to_array (mode, ops[1], arr_fsmbi);
1548 if (imode != mode)
1549 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1550 else
1551 to = ops[0];
1552 for (i = 0; i < 16; i++)
1553 if (arr_fsmbi[i] != 0)
1554 {
1555 arr_andbi[0] = arr_fsmbi[i];
1556 arr_fsmbi[i] = 0xff;
1557 }
1558 for (i = 1; i < 16; i++)
1559 arr_andbi[i] = arr_andbi[0];
1560 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1561 reg_and = array_to_constant (imode, arr_andbi);
1562 emit_move_insn (to, reg_fsmbi);
1563 emit_insn (gen_rtx_SET
1564 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1565 return 1;
1566 }
dea01258 1567 case IC_POOL:
1568 if (reload_in_progress || reload_completed)
1569 {
1570 rtx mem = force_const_mem (mode, ops[1]);
1571 if (TARGET_LARGE_MEM)
1572 {
1573 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1574 emit_move_insn (addr, XEXP (mem, 0));
1575 mem = replace_equiv_address (mem, addr);
1576 }
1577 emit_move_insn (ops[0], mem);
1578 return 1;
1579 }
1580 break;
1581 case IC_IL1s:
1582 case IC_IL2s:
1583 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1584 {
1585 if (c == IC_IL2s)
1586 {
5df189be 1587 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1588 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1589 }
1590 else if (flag_pic)
1591 emit_insn (gen_pic (ops[0], ops[1]));
1592 if (flag_pic)
1593 {
1594 rtx pic_reg = get_pic_reg ();
1595 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1596 crtl->uses_pic_offset_table = 1;
dea01258 1597 }
1598 return flag_pic || c == IC_IL2s;
1599 }
1600 break;
1601 case IC_IL1:
1602 case IC_FSMBI:
1603 case IC_CPAT:
1604 break;
c8befdb9 1605 }
dea01258 1606 return 0;
c8befdb9 1607}
1608
644459d0 1609/* SAVING is TRUE when we are generating the actual load and store
1610 instructions for REGNO. When determining the size of the stack
1611 needed for saving register we must allocate enough space for the
1612 worst case, because we don't always have the information early enough
1613 to not allocate it. But we can at least eliminate the actual loads
1614 and stores during the prologue/epilogue. */
1615static int
1616need_to_save_reg (int regno, int saving)
1617{
3072d30e 1618 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1619 return 1;
1620 if (flag_pic
1621 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1622 && (!saving || crtl->uses_pic_offset_table)
644459d0 1623 && (!saving
3072d30e 1624 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1625 return 1;
1626 return 0;
1627}
1628
1629/* This function is only correct starting with local register
1630 allocation */
1631int
1632spu_saved_regs_size (void)
1633{
1634 int reg_save_size = 0;
1635 int regno;
1636
1637 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1638 if (need_to_save_reg (regno, 0))
1639 reg_save_size += 0x10;
1640 return reg_save_size;
1641}
1642
1643static rtx
1644frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1645{
1646 rtx reg = gen_rtx_REG (V4SImode, regno);
1647 rtx mem =
1648 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1649 return emit_insn (gen_movv4si (mem, reg));
1650}
1651
1652static rtx
1653frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1654{
1655 rtx reg = gen_rtx_REG (V4SImode, regno);
1656 rtx mem =
1657 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1658 return emit_insn (gen_movv4si (reg, mem));
1659}
1660
1661/* This happens after reload, so we need to expand it. */
1662static rtx
1663frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1664{
1665 rtx insn;
1666 if (satisfies_constraint_K (GEN_INT (imm)))
1667 {
1668 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1669 }
1670 else
1671 {
3072d30e 1672 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1673 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1674 if (REGNO (src) == REGNO (scratch))
1675 abort ();
1676 }
644459d0 1677 return insn;
1678}
1679
1680/* Return nonzero if this function is known to have a null epilogue. */
1681
1682int
1683direct_return (void)
1684{
1685 if (reload_completed)
1686 {
1687 if (cfun->static_chain_decl == 0
1688 && (spu_saved_regs_size ()
1689 + get_frame_size ()
abe32cce 1690 + crtl->outgoing_args_size
1691 + crtl->args.pretend_args_size == 0)
644459d0 1692 && current_function_is_leaf)
1693 return 1;
1694 }
1695 return 0;
1696}
1697
1698/*
1699 The stack frame looks like this:
1700 +-------------+
1701 | incoming |
1702 AP | args |
1703 +-------------+
1704 | $lr save |
1705 +-------------+
1706 prev SP | back chain |
1707 +-------------+
1708 | var args |
abe32cce 1709 | reg save | crtl->args.pretend_args_size bytes
644459d0 1710 +-------------+
1711 | ... |
1712 | saved regs | spu_saved_regs_size() bytes
1713 +-------------+
1714 | ... |
1715 FP | vars | get_frame_size() bytes
1716 +-------------+
1717 | ... |
1718 | outgoing |
abe32cce 1719 | args | crtl->outgoing_args_size bytes
644459d0 1720 +-------------+
1721 | $lr of next |
1722 | frame |
1723 +-------------+
1724 SP | back chain |
1725 +-------------+
1726
1727*/
1728void
1729spu_expand_prologue (void)
1730{
1731 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1732 HOST_WIDE_INT total_size;
1733 HOST_WIDE_INT saved_regs_size;
1734 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1735 rtx scratch_reg_0, scratch_reg_1;
1736 rtx insn, real;
1737
1738 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1739 the "toplevel" insn chain. */
1740 emit_note (NOTE_INSN_DELETED);
1741
1742 if (flag_pic && optimize == 0)
18d50ae6 1743 crtl->uses_pic_offset_table = 1;
644459d0 1744
1745 if (spu_naked_function_p (current_function_decl))
1746 return;
1747
1748 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1749 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1750
1751 saved_regs_size = spu_saved_regs_size ();
1752 total_size = size + saved_regs_size
abe32cce 1753 + crtl->outgoing_args_size
1754 + crtl->args.pretend_args_size;
644459d0 1755
1756 if (!current_function_is_leaf
18d50ae6 1757 || cfun->calls_alloca || total_size > 0)
644459d0 1758 total_size += STACK_POINTER_OFFSET;
1759
1760 /* Save this first because code after this might use the link
1761 register as a scratch register. */
1762 if (!current_function_is_leaf)
1763 {
1764 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1765 RTX_FRAME_RELATED_P (insn) = 1;
1766 }
1767
1768 if (total_size > 0)
1769 {
abe32cce 1770 offset = -crtl->args.pretend_args_size;
644459d0 1771 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1772 if (need_to_save_reg (regno, 1))
1773 {
1774 offset -= 16;
1775 insn = frame_emit_store (regno, sp_reg, offset);
1776 RTX_FRAME_RELATED_P (insn) = 1;
1777 }
1778 }
1779
18d50ae6 1780 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1781 {
1782 rtx pic_reg = get_pic_reg ();
1783 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1784 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1785 }
1786
1787 if (total_size > 0)
1788 {
1789 if (flag_stack_check)
1790 {
d819917f 1791 /* We compare against total_size-1 because
644459d0 1792 ($sp >= total_size) <=> ($sp > total_size-1) */
1793 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1794 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1795 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1796 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1797 {
1798 emit_move_insn (scratch_v4si, size_v4si);
1799 size_v4si = scratch_v4si;
1800 }
1801 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1802 emit_insn (gen_vec_extractv4si
1803 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1804 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1805 }
1806
1807 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1808 the value of the previous $sp because we save it as the back
1809 chain. */
1810 if (total_size <= 2000)
1811 {
1812 /* In this case we save the back chain first. */
1813 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1814 insn =
1815 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1816 }
1817 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1818 {
1819 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1820 insn =
1821 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1822 }
1823 else
1824 {
1825 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1826 insn =
1827 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1828 }
1829 RTX_FRAME_RELATED_P (insn) = 1;
1830 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1831 REG_NOTES (insn) =
1832 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1833
1834 if (total_size > 2000)
1835 {
1836 /* Save the back chain ptr */
1837 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1838 }
1839
1840 if (frame_pointer_needed)
1841 {
1842 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1843 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1844 + crtl->outgoing_args_size;
644459d0 1845 /* Set the new frame_pointer */
d8dfeb55 1846 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1847 RTX_FRAME_RELATED_P (insn) = 1;
1848 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1849 REG_NOTES (insn) =
1850 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1851 real, REG_NOTES (insn));
5df189be 1852 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1853 }
1854 }
1855
1856 emit_note (NOTE_INSN_DELETED);
1857}
1858
1859void
1860spu_expand_epilogue (bool sibcall_p)
1861{
1862 int size = get_frame_size (), offset, regno;
1863 HOST_WIDE_INT saved_regs_size, total_size;
1864 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1865 rtx jump, scratch_reg_0;
1866
1867 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1868 the "toplevel" insn chain. */
1869 emit_note (NOTE_INSN_DELETED);
1870
1871 if (spu_naked_function_p (current_function_decl))
1872 return;
1873
1874 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1875
1876 saved_regs_size = spu_saved_regs_size ();
1877 total_size = size + saved_regs_size
abe32cce 1878 + crtl->outgoing_args_size
1879 + crtl->args.pretend_args_size;
644459d0 1880
1881 if (!current_function_is_leaf
18d50ae6 1882 || cfun->calls_alloca || total_size > 0)
644459d0 1883 total_size += STACK_POINTER_OFFSET;
1884
1885 if (total_size > 0)
1886 {
18d50ae6 1887 if (cfun->calls_alloca)
644459d0 1888 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1889 else
1890 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1891
1892
1893 if (saved_regs_size > 0)
1894 {
abe32cce 1895 offset = -crtl->args.pretend_args_size;
644459d0 1896 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1897 if (need_to_save_reg (regno, 1))
1898 {
1899 offset -= 0x10;
1900 frame_emit_load (regno, sp_reg, offset);
1901 }
1902 }
1903 }
1904
1905 if (!current_function_is_leaf)
1906 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1907
1908 if (!sibcall_p)
1909 {
18b42941 1910 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 1911 jump = emit_jump_insn (gen__return ());
1912 emit_barrier_after (jump);
1913 }
1914
1915 emit_note (NOTE_INSN_DELETED);
1916}
1917
1918rtx
1919spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1920{
1921 if (count != 0)
1922 return 0;
1923 /* This is inefficient because it ends up copying to a save-register
1924 which then gets saved even though $lr has already been saved. But
1925 it does generate better code for leaf functions and we don't need
1926 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1927 used for __builtin_return_address anyway, so maybe we don't care if
1928 it's inefficient. */
1929 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1930}
1931\f
1932
1933/* Given VAL, generate a constant appropriate for MODE.
1934 If MODE is a vector mode, every element will be VAL.
1935 For TImode, VAL will be zero extended to 128 bits. */
1936rtx
1937spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1938{
1939 rtx inner;
1940 rtvec v;
1941 int units, i;
1942
1943 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1944 || GET_MODE_CLASS (mode) == MODE_FLOAT
1945 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1946 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1947
1948 if (GET_MODE_CLASS (mode) == MODE_INT)
1949 return immed_double_const (val, 0, mode);
1950
1951 /* val is the bit representation of the float */
1952 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1953 return hwint_to_const_double (mode, val);
1954
1955 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1956 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1957 else
1958 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1959
1960 units = GET_MODE_NUNITS (mode);
1961
1962 v = rtvec_alloc (units);
1963
1964 for (i = 0; i < units; ++i)
1965 RTVEC_ELT (v, i) = inner;
1966
1967 return gen_rtx_CONST_VECTOR (mode, v);
1968}
1969\f
1970/* branch hint stuff */
1971
1972/* The hardware requires 8 insns between a hint and the branch it
1973 effects. This variable describes how many rtl instructions the
1974 compiler needs to see before inserting a hint. (FIXME: We should
1975 accept less and insert nops to enforce it because hinting is always
1976 profitable for performance, but we do need to be careful of code
1977 size.) */
1978int spu_hint_dist = (8 * 4);
1979
5474166e 1980/* Create a MODE vector constant from 4 ints. */
1981rtx
1982spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1983{
1984 unsigned char arr[16];
1985 arr[0] = (a >> 24) & 0xff;
1986 arr[1] = (a >> 16) & 0xff;
1987 arr[2] = (a >> 8) & 0xff;
1988 arr[3] = (a >> 0) & 0xff;
1989 arr[4] = (b >> 24) & 0xff;
1990 arr[5] = (b >> 16) & 0xff;
1991 arr[6] = (b >> 8) & 0xff;
1992 arr[7] = (b >> 0) & 0xff;
1993 arr[8] = (c >> 24) & 0xff;
1994 arr[9] = (c >> 16) & 0xff;
1995 arr[10] = (c >> 8) & 0xff;
1996 arr[11] = (c >> 0) & 0xff;
1997 arr[12] = (d >> 24) & 0xff;
1998 arr[13] = (d >> 16) & 0xff;
1999 arr[14] = (d >> 8) & 0xff;
2000 arr[15] = (d >> 0) & 0xff;
2001 return array_to_constant(mode, arr);
2002}
2003
644459d0 2004/* An array of these is used to propagate hints to predecessor blocks. */
2005struct spu_bb_info
2006{
fa7637bd 2007 rtx prop_jump; /* propagated from another block */
2008 basic_block bb; /* the original block. */
644459d0 2009};
2010
2011/* The special $hbr register is used to prevent the insn scheduler from
2012 moving hbr insns across instructions which invalidate them. It
2013 should only be used in a clobber, and this function searches for
2014 insns which clobber it. */
2015static bool
2016insn_clobbers_hbr (rtx insn)
2017{
2018 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
2019 {
2020 rtx parallel = PATTERN (insn);
2021 rtx clobber;
2022 int j;
2023 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2024 {
2025 clobber = XVECEXP (parallel, 0, j);
2026 if (GET_CODE (clobber) == CLOBBER
2027 && GET_CODE (XEXP (clobber, 0)) == REG
2028 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2029 return 1;
2030 }
2031 }
2032 return 0;
2033}
2034
2035static void
2036spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
2037{
2038 rtx branch_label;
2039 rtx hint, insn, prev, next;
2040
2041 if (before == 0 || branch == 0 || target == 0)
2042 return;
2043
2044 if (distance > 600)
2045 return;
2046
2047
2048 branch_label = gen_label_rtx ();
2049 LABEL_NUSES (branch_label)++;
2050 LABEL_PRESERVE_P (branch_label) = 1;
2051 insn = emit_label_before (branch_label, branch);
2052 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2053
2054 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2055 the current insn is pipe0, dual issue with it. */
2056 prev = prev_active_insn (before);
2057 if (prev && get_pipe (prev) == 0)
2058 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2059 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
2060 {
2061 next = next_active_insn (before);
2062 hint = emit_insn_after (gen_hbr (branch_label, target), before);
2063 if (next)
2064 PUT_MODE (next, TImode);
2065 }
2066 else
2067 {
2068 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2069 PUT_MODE (hint, TImode);
2070 }
2071 recog_memoized (hint);
2072}
2073
2074/* Returns 0 if we don't want a hint for this branch. Otherwise return
2075 the rtx for the branch target. */
2076static rtx
2077get_branch_target (rtx branch)
2078{
2079 if (GET_CODE (branch) == JUMP_INSN)
2080 {
2081 rtx set, src;
2082
2083 /* Return statements */
2084 if (GET_CODE (PATTERN (branch)) == RETURN)
2085 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2086
2087 /* jump table */
2088 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2089 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2090 return 0;
2091
2092 set = single_set (branch);
2093 src = SET_SRC (set);
2094 if (GET_CODE (SET_DEST (set)) != PC)
2095 abort ();
2096
2097 if (GET_CODE (src) == IF_THEN_ELSE)
2098 {
2099 rtx lab = 0;
2100 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2101 if (note)
2102 {
2103 /* If the more probable case is not a fall through, then
2104 try a branch hint. */
2105 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2106 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2107 && GET_CODE (XEXP (src, 1)) != PC)
2108 lab = XEXP (src, 1);
2109 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2110 && GET_CODE (XEXP (src, 2)) != PC)
2111 lab = XEXP (src, 2);
2112 }
2113 if (lab)
2114 {
2115 if (GET_CODE (lab) == RETURN)
2116 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2117 return lab;
2118 }
2119 return 0;
2120 }
2121
2122 return src;
2123 }
2124 else if (GET_CODE (branch) == CALL_INSN)
2125 {
2126 rtx call;
2127 /* All of our call patterns are in a PARALLEL and the CALL is
2128 the first pattern in the PARALLEL. */
2129 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2130 abort ();
2131 call = XVECEXP (PATTERN (branch), 0, 0);
2132 if (GET_CODE (call) == SET)
2133 call = SET_SRC (call);
2134 if (GET_CODE (call) != CALL)
2135 abort ();
2136 return XEXP (XEXP (call, 0), 0);
2137 }
2138 return 0;
2139}
2140
2141static void
2142insert_branch_hints (void)
2143{
2144 struct spu_bb_info *spu_bb_info;
2145 rtx branch, insn, next;
2146 rtx branch_target = 0;
2147 int branch_addr = 0, insn_addr, head_addr;
2148 basic_block bb;
2149 unsigned int j;
2150
2151 spu_bb_info =
2152 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2153 sizeof (struct spu_bb_info));
2154
2155 /* We need exact insn addresses and lengths. */
2156 shorten_branches (get_insns ());
2157
2158 FOR_EACH_BB_REVERSE (bb)
2159 {
2160 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2161 branch = 0;
2162 if (spu_bb_info[bb->index].prop_jump)
2163 {
2164 branch = spu_bb_info[bb->index].prop_jump;
2165 branch_target = get_branch_target (branch);
2166 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2167 }
2168 /* Search from end of a block to beginning. In this loop, find
2169 jumps which need a branch and emit them only when:
2170 - it's an indirect branch and we're at the insn which sets
2171 the register
2172 - we're at an insn that will invalidate the hint. e.g., a
2173 call, another hint insn, inline asm that clobbers $hbr, and
2174 some inlined operations (divmodsi4). Don't consider jumps
2175 because they are only at the end of a block and are
2176 considered when we are deciding whether to propagate
2177 - we're getting too far away from the branch. The hbr insns
5b865faf 2178 only have a signed 10-bit offset
644459d0 2179 We go back as far as possible so the branch will be considered
2180 for propagation when we get to the beginning of the block. */
2181 next = 0;
2182 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2183 {
2184 if (INSN_P (insn))
2185 {
2186 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2187 if (branch && next
2188 && ((GET_CODE (branch_target) == REG
2189 && set_of (branch_target, insn) != NULL_RTX)
2190 || insn_clobbers_hbr (insn)
2191 || branch_addr - insn_addr > 600))
2192 {
2193 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2194 if (insn != BB_END (bb)
2195 && branch_addr - next_addr >= spu_hint_dist)
2196 {
2197 if (dump_file)
2198 fprintf (dump_file,
2199 "hint for %i in block %i before %i\n",
2200 INSN_UID (branch), bb->index, INSN_UID (next));
2201 spu_emit_branch_hint (next, branch, branch_target,
2202 branch_addr - next_addr);
2203 }
2204 branch = 0;
2205 }
2206
2207 /* JUMP_P will only be true at the end of a block. When
2208 branch is already set it means we've previously decided
2209 to propagate a hint for that branch into this block. */
2210 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2211 {
2212 branch = 0;
2213 if ((branch_target = get_branch_target (insn)))
2214 {
2215 branch = insn;
2216 branch_addr = insn_addr;
2217 }
2218 }
2219
2220 /* When a branch hint is emitted it will be inserted
2221 before "next". Make sure next is the beginning of a
2222 cycle to minimize impact on the scheduled insns. */
2223 if (GET_MODE (insn) == TImode)
2224 next = insn;
2225 }
2226 if (insn == BB_HEAD (bb))
2227 break;
2228 }
2229
2230 if (branch)
2231 {
2232 /* If we haven't emitted a hint for this branch yet, it might
2233 be profitable to emit it in one of the predecessor blocks,
2234 especially for loops. */
2235 rtx bbend;
2236 basic_block prev = 0, prop = 0, prev2 = 0;
2237 int loop_exit = 0, simple_loop = 0;
2238 int next_addr = 0;
2239 if (next)
2240 next_addr = INSN_ADDRESSES (INSN_UID (next));
2241
2242 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2243 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2244 prev = EDGE_PRED (bb, j)->src;
2245 else
2246 prev2 = EDGE_PRED (bb, j)->src;
2247
2248 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2249 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2250 loop_exit = 1;
2251 else if (EDGE_SUCC (bb, j)->dest == bb)
2252 simple_loop = 1;
2253
2254 /* If this branch is a loop exit then propagate to previous
2255 fallthru block. This catches the cases when it is a simple
2256 loop or when there is an initial branch into the loop. */
2257 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2258 prop = prev;
2259
2260 /* If there is only one adjacent predecessor. Don't propagate
2261 outside this loop. This loop_depth test isn't perfect, but
2262 I'm not sure the loop_father member is valid at this point. */
2263 else if (prev && single_pred_p (bb)
2264 && prev->loop_depth == bb->loop_depth)
2265 prop = prev;
2266
2267 /* If this is the JOIN block of a simple IF-THEN then
80777cd8 2268 propagate the hint to the HEADER block. */
644459d0 2269 else if (prev && prev2
2270 && EDGE_COUNT (bb->preds) == 2
2271 && EDGE_COUNT (prev->preds) == 1
2272 && EDGE_PRED (prev, 0)->src == prev2
2273 && prev2->loop_depth == bb->loop_depth
2274 && GET_CODE (branch_target) != REG)
2275 prop = prev;
2276
2277 /* Don't propagate when:
2278 - this is a simple loop and the hint would be too far
2279 - this is not a simple loop and there are 16 insns in
2280 this block already
2281 - the predecessor block ends in a branch that will be
2282 hinted
2283 - the predecessor block ends in an insn that invalidates
2284 the hint */
2285 if (prop
2286 && prop->index >= 0
2287 && (bbend = BB_END (prop))
2288 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2289 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2290 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2291 {
2292 if (dump_file)
2293 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2294 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2295 bb->index, prop->index, bb->loop_depth,
2296 INSN_UID (branch), loop_exit, simple_loop,
2297 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2298
2299 spu_bb_info[prop->index].prop_jump = branch;
2300 spu_bb_info[prop->index].bb = bb;
2301 }
2302 else if (next && branch_addr - next_addr >= spu_hint_dist)
2303 {
2304 if (dump_file)
2305 fprintf (dump_file, "hint for %i in block %i before %i\n",
2306 INSN_UID (branch), bb->index, INSN_UID (next));
2307 spu_emit_branch_hint (next, branch, branch_target,
2308 branch_addr - next_addr);
2309 }
2310 branch = 0;
2311 }
2312 }
2313 free (spu_bb_info);
2314}
2315\f
2316/* Emit a nop for INSN such that the two will dual issue. This assumes
2317 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2318 We check for TImode to handle a MULTI1 insn which has dual issued its
2319 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2320 ADDR_VEC insns. */
2321static void
2322emit_nop_for_insn (rtx insn)
2323{
2324 int p;
2325 rtx new_insn;
2326 p = get_pipe (insn);
2327 if (p == 1 && GET_MODE (insn) == TImode)
2328 {
2329 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2330 PUT_MODE (new_insn, TImode);
2331 PUT_MODE (insn, VOIDmode);
2332 }
2333 else
2334 new_insn = emit_insn_after (gen_lnop (), insn);
2335}
2336
2337/* Insert nops in basic blocks to meet dual issue alignment
2338 requirements. */
2339static void
2340insert_nops (void)
2341{
2342 rtx insn, next_insn, prev_insn;
2343 int length;
2344 int addr;
2345
2346 /* This sets up INSN_ADDRESSES. */
2347 shorten_branches (get_insns ());
2348
2349 /* Keep track of length added by nops. */
2350 length = 0;
2351
2352 prev_insn = 0;
2353 for (insn = get_insns (); insn; insn = next_insn)
2354 {
2355 next_insn = next_active_insn (insn);
2356 addr = INSN_ADDRESSES (INSN_UID (insn));
2357 if (GET_MODE (insn) == TImode
2358 && next_insn
2359 && GET_MODE (next_insn) != TImode
2360 && ((addr + length) & 7) != 0)
2361 {
2362 /* prev_insn will always be set because the first insn is
2363 always 8-byte aligned. */
2364 emit_nop_for_insn (prev_insn);
2365 length += 4;
2366 }
2367 prev_insn = insn;
2368 }
2369}
2370
2371static void
2372spu_machine_dependent_reorg (void)
2373{
2374 if (optimize > 0)
2375 {
2376 if (TARGET_BRANCH_HINTS)
2377 insert_branch_hints ();
2378 insert_nops ();
2379 }
2380}
2381\f
2382
2383/* Insn scheduling routines, primarily for dual issue. */
2384static int
2385spu_sched_issue_rate (void)
2386{
2387 return 2;
2388}
2389
2390static int
2391spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2392 int verbose ATTRIBUTE_UNUSED, rtx insn,
2393 int can_issue_more)
2394{
2395 if (GET_CODE (PATTERN (insn)) != USE
2396 && GET_CODE (PATTERN (insn)) != CLOBBER
2397 && get_pipe (insn) != -2)
2398 can_issue_more--;
2399 return can_issue_more;
2400}
2401
2402static int
2403get_pipe (rtx insn)
2404{
2405 enum attr_type t;
2406 /* Handle inline asm */
2407 if (INSN_CODE (insn) == -1)
2408 return -1;
2409 t = get_attr_type (insn);
2410 switch (t)
2411 {
2412 case TYPE_CONVERT:
2413 return -2;
2414 case TYPE_MULTI0:
2415 return -1;
2416
2417 case TYPE_FX2:
2418 case TYPE_FX3:
2419 case TYPE_SPR:
2420 case TYPE_NOP:
2421 case TYPE_FXB:
2422 case TYPE_FPD:
2423 case TYPE_FP6:
2424 case TYPE_FP7:
2425 case TYPE_IPREFETCH:
2426 return 0;
2427
2428 case TYPE_LNOP:
2429 case TYPE_SHUF:
2430 case TYPE_LOAD:
2431 case TYPE_STORE:
2432 case TYPE_BR:
2433 case TYPE_MULTI1:
2434 case TYPE_HBR:
2435 return 1;
2436 default:
2437 abort ();
2438 }
2439}
2440
2441static int
2442spu_sched_adjust_priority (rtx insn, int pri)
2443{
2444 int p = get_pipe (insn);
2445 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2446 * scheduling. */
2447 if (GET_CODE (PATTERN (insn)) == USE
2448 || GET_CODE (PATTERN (insn)) == CLOBBER
2449 || p == -2)
2450 return pri + 100;
2451 /* Schedule pipe0 insns early for greedier dual issue. */
2452 if (p != 1)
2453 return pri + 50;
2454 return pri;
2455}
2456
2457/* INSN is dependent on DEP_INSN. */
2458static int
2459spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2460 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2461{
2462 if (GET_CODE (insn) == CALL_INSN)
2463 return cost - 2;
2464 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2465 scheduler makes every insn in a block anti-dependent on the final
2466 jump_insn. We adjust here so higher cost insns will get scheduled
2467 earlier. */
2468 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 2469 return insn_cost (dep_insn) - 3;
644459d0 2470 return cost;
2471}
2472\f
2473/* Create a CONST_DOUBLE from a string. */
2474struct rtx_def *
2475spu_float_const (const char *string, enum machine_mode mode)
2476{
2477 REAL_VALUE_TYPE value;
2478 value = REAL_VALUE_ATOF (string, mode);
2479 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2480}
2481
644459d0 2482int
2483spu_constant_address_p (rtx x)
2484{
2485 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2486 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2487 || GET_CODE (x) == HIGH);
2488}
2489
2490static enum spu_immediate
2491which_immediate_load (HOST_WIDE_INT val)
2492{
2493 gcc_assert (val == trunc_int_for_mode (val, SImode));
2494
2495 if (val >= -0x8000 && val <= 0x7fff)
2496 return SPU_IL;
2497 if (val >= 0 && val <= 0x3ffff)
2498 return SPU_ILA;
2499 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2500 return SPU_ILH;
2501 if ((val & 0xffff) == 0)
2502 return SPU_ILHU;
2503
2504 return SPU_NONE;
2505}
2506
dea01258 2507/* Return true when OP can be loaded by one of the il instructions, or
2508 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 2509int
2510immediate_load_p (rtx op, enum machine_mode mode)
dea01258 2511{
2512 if (CONSTANT_P (op))
2513 {
2514 enum immediate_class c = classify_immediate (op, mode);
5df189be 2515 return c == IC_IL1 || c == IC_IL1s
3072d30e 2516 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 2517 }
2518 return 0;
2519}
2520
2521/* Return true if the first SIZE bytes of arr is a constant that can be
2522 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2523 represent the size and offset of the instruction to use. */
2524static int
2525cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2526{
2527 int cpat, run, i, start;
2528 cpat = 1;
2529 run = 0;
2530 start = -1;
2531 for (i = 0; i < size && cpat; i++)
2532 if (arr[i] != i+16)
2533 {
2534 if (!run)
2535 {
2536 start = i;
2537 if (arr[i] == 3)
2538 run = 1;
2539 else if (arr[i] == 2 && arr[i+1] == 3)
2540 run = 2;
2541 else if (arr[i] == 0)
2542 {
2543 while (arr[i+run] == run && i+run < 16)
2544 run++;
2545 if (run != 4 && run != 8)
2546 cpat = 0;
2547 }
2548 else
2549 cpat = 0;
2550 if ((i & (run-1)) != 0)
2551 cpat = 0;
2552 i += run;
2553 }
2554 else
2555 cpat = 0;
2556 }
b01a6dc3 2557 if (cpat && (run || size < 16))
dea01258 2558 {
2559 if (run == 0)
2560 run = 1;
2561 if (prun)
2562 *prun = run;
2563 if (pstart)
2564 *pstart = start == -1 ? 16-run : start;
2565 return 1;
2566 }
2567 return 0;
2568}
2569
2570/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 2571 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 2572static enum immediate_class
2573classify_immediate (rtx op, enum machine_mode mode)
644459d0 2574{
2575 HOST_WIDE_INT val;
2576 unsigned char arr[16];
5df189be 2577 int i, j, repeated, fsmbi, repeat;
dea01258 2578
2579 gcc_assert (CONSTANT_P (op));
2580
644459d0 2581 if (GET_MODE (op) != VOIDmode)
2582 mode = GET_MODE (op);
2583
dea01258 2584 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 2585 if (!flag_pic
2586 && mode == V4SImode
dea01258 2587 && GET_CODE (op) == CONST_VECTOR
2588 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2589 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2590 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2591 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2592 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2593 op = CONST_VECTOR_ELT (op, 0);
644459d0 2594
dea01258 2595 switch (GET_CODE (op))
2596 {
2597 case SYMBOL_REF:
2598 case LABEL_REF:
2599 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 2600
dea01258 2601 case CONST:
0cfc65d4 2602 /* We can never know if the resulting address fits in 18 bits and can be
2603 loaded with ila. For now, assume the address will not overflow if
2604 the displacement is "small" (fits 'K' constraint). */
2605 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
2606 {
2607 rtx sym = XEXP (XEXP (op, 0), 0);
2608 rtx cst = XEXP (XEXP (op, 0), 1);
2609
2610 if (GET_CODE (sym) == SYMBOL_REF
2611 && GET_CODE (cst) == CONST_INT
2612 && satisfies_constraint_K (cst))
2613 return IC_IL1s;
2614 }
2615 return IC_IL2s;
644459d0 2616
dea01258 2617 case HIGH:
2618 return IC_IL1s;
2619
2620 case CONST_VECTOR:
2621 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2622 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2623 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2624 return IC_POOL;
2625 /* Fall through. */
2626
2627 case CONST_INT:
2628 case CONST_DOUBLE:
2629 constant_to_array (mode, op, arr);
644459d0 2630
dea01258 2631 /* Check that each 4-byte slot is identical. */
2632 repeated = 1;
2633 for (i = 4; i < 16; i += 4)
2634 for (j = 0; j < 4; j++)
2635 if (arr[j] != arr[i + j])
2636 repeated = 0;
2637
2638 if (repeated)
2639 {
2640 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2641 val = trunc_int_for_mode (val, SImode);
2642
2643 if (which_immediate_load (val) != SPU_NONE)
2644 return IC_IL1;
2645 }
2646
2647 /* Any mode of 2 bytes or smaller can be loaded with an il
2648 instruction. */
2649 gcc_assert (GET_MODE_SIZE (mode) > 2);
2650
2651 fsmbi = 1;
5df189be 2652 repeat = 0;
dea01258 2653 for (i = 0; i < 16 && fsmbi; i++)
5df189be 2654 if (arr[i] != 0 && repeat == 0)
2655 repeat = arr[i];
2656 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 2657 fsmbi = 0;
2658 if (fsmbi)
5df189be 2659 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 2660
2661 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2662 return IC_CPAT;
2663
2664 if (repeated)
2665 return IC_IL2;
2666
2667 return IC_POOL;
2668 default:
2669 break;
2670 }
2671 gcc_unreachable ();
644459d0 2672}
2673
2674static enum spu_immediate
2675which_logical_immediate (HOST_WIDE_INT val)
2676{
2677 gcc_assert (val == trunc_int_for_mode (val, SImode));
2678
2679 if (val >= -0x200 && val <= 0x1ff)
2680 return SPU_ORI;
2681 if (val >= 0 && val <= 0xffff)
2682 return SPU_IOHL;
2683 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2684 {
2685 val = trunc_int_for_mode (val, HImode);
2686 if (val >= -0x200 && val <= 0x1ff)
2687 return SPU_ORHI;
2688 if ((val & 0xff) == ((val >> 8) & 0xff))
2689 {
2690 val = trunc_int_for_mode (val, QImode);
2691 if (val >= -0x200 && val <= 0x1ff)
2692 return SPU_ORBI;
2693 }
2694 }
2695 return SPU_NONE;
2696}
2697
5df189be 2698/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2699 CONST_DOUBLEs. */
2700static int
2701const_vector_immediate_p (rtx x)
2702{
2703 int i;
2704 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2705 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2706 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2707 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2708 return 0;
2709 return 1;
2710}
2711
644459d0 2712int
2713logical_immediate_p (rtx op, enum machine_mode mode)
2714{
2715 HOST_WIDE_INT val;
2716 unsigned char arr[16];
2717 int i, j;
2718
2719 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2720 || GET_CODE (op) == CONST_VECTOR);
2721
5df189be 2722 if (GET_CODE (op) == CONST_VECTOR
2723 && !const_vector_immediate_p (op))
2724 return 0;
2725
644459d0 2726 if (GET_MODE (op) != VOIDmode)
2727 mode = GET_MODE (op);
2728
2729 constant_to_array (mode, op, arr);
2730
2731 /* Check that bytes are repeated. */
2732 for (i = 4; i < 16; i += 4)
2733 for (j = 0; j < 4; j++)
2734 if (arr[j] != arr[i + j])
2735 return 0;
2736
2737 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2738 val = trunc_int_for_mode (val, SImode);
2739
2740 i = which_logical_immediate (val);
2741 return i != SPU_NONE && i != SPU_IOHL;
2742}
2743
2744int
2745iohl_immediate_p (rtx op, enum machine_mode mode)
2746{
2747 HOST_WIDE_INT val;
2748 unsigned char arr[16];
2749 int i, j;
2750
2751 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2752 || GET_CODE (op) == CONST_VECTOR);
2753
5df189be 2754 if (GET_CODE (op) == CONST_VECTOR
2755 && !const_vector_immediate_p (op))
2756 return 0;
2757
644459d0 2758 if (GET_MODE (op) != VOIDmode)
2759 mode = GET_MODE (op);
2760
2761 constant_to_array (mode, op, arr);
2762
2763 /* Check that bytes are repeated. */
2764 for (i = 4; i < 16; i += 4)
2765 for (j = 0; j < 4; j++)
2766 if (arr[j] != arr[i + j])
2767 return 0;
2768
2769 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2770 val = trunc_int_for_mode (val, SImode);
2771
2772 return val >= 0 && val <= 0xffff;
2773}
2774
2775int
2776arith_immediate_p (rtx op, enum machine_mode mode,
2777 HOST_WIDE_INT low, HOST_WIDE_INT high)
2778{
2779 HOST_WIDE_INT val;
2780 unsigned char arr[16];
2781 int bytes, i, j;
2782
2783 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2784 || GET_CODE (op) == CONST_VECTOR);
2785
5df189be 2786 if (GET_CODE (op) == CONST_VECTOR
2787 && !const_vector_immediate_p (op))
2788 return 0;
2789
644459d0 2790 if (GET_MODE (op) != VOIDmode)
2791 mode = GET_MODE (op);
2792
2793 constant_to_array (mode, op, arr);
2794
2795 if (VECTOR_MODE_P (mode))
2796 mode = GET_MODE_INNER (mode);
2797
2798 bytes = GET_MODE_SIZE (mode);
2799 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2800
2801 /* Check that bytes are repeated. */
2802 for (i = bytes; i < 16; i += bytes)
2803 for (j = 0; j < bytes; j++)
2804 if (arr[j] != arr[i + j])
2805 return 0;
2806
2807 val = arr[0];
2808 for (j = 1; j < bytes; j++)
2809 val = (val << 8) | arr[j];
2810
2811 val = trunc_int_for_mode (val, mode);
2812
2813 return val >= low && val <= high;
2814}
2815
2816/* We accept:
5b865faf 2817 - any 32-bit constant (SImode, SFmode)
644459d0 2818 - any constant that can be generated with fsmbi (any mode)
5b865faf 2819 - a 64-bit constant where the high and low bits are identical
644459d0 2820 (DImode, DFmode)
5b865faf 2821 - a 128-bit constant where the four 32-bit words match. */
644459d0 2822int
2823spu_legitimate_constant_p (rtx x)
2824{
5df189be 2825 if (GET_CODE (x) == HIGH)
2826 x = XEXP (x, 0);
644459d0 2827 /* V4SI with all identical symbols is valid. */
5df189be 2828 if (!flag_pic
2829 && GET_MODE (x) == V4SImode
644459d0 2830 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2831 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 2832 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 2833 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2834 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2835 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2836
5df189be 2837 if (GET_CODE (x) == CONST_VECTOR
2838 && !const_vector_immediate_p (x))
2839 return 0;
644459d0 2840 return 1;
2841}
2842
2843/* Valid address are:
2844 - symbol_ref, label_ref, const
2845 - reg
2846 - reg + const, where either reg or const is 16 byte aligned
2847 - reg + reg, alignment doesn't matter
2848 The alignment matters in the reg+const case because lqd and stqd
2849 ignore the 4 least significant bits of the const. (TODO: It might be
2850 preferable to allow any alignment and fix it up when splitting.) */
2851int
2852spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2853 rtx x, int reg_ok_strict)
2854{
2855 if (mode == TImode && GET_CODE (x) == AND
2856 && GET_CODE (XEXP (x, 1)) == CONST_INT
2857 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2858 x = XEXP (x, 0);
2859 switch (GET_CODE (x))
2860 {
2861 case SYMBOL_REF:
2862 case LABEL_REF:
2863 return !TARGET_LARGE_MEM;
2864
2865 case CONST:
0cfc65d4 2866 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
2867 {
2868 rtx sym = XEXP (XEXP (x, 0), 0);
2869 rtx cst = XEXP (XEXP (x, 0), 1);
2870
2871 /* Accept any symbol_ref + constant, assuming it does not
2872 wrap around the local store addressability limit. */
2873 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
2874 return 1;
2875 }
2876 return 0;
644459d0 2877
2878 case CONST_INT:
2879 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2880
2881 case SUBREG:
2882 x = XEXP (x, 0);
2883 gcc_assert (GET_CODE (x) == REG);
2884
2885 case REG:
2886 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2887
2888 case PLUS:
2889 case LO_SUM:
2890 {
2891 rtx op0 = XEXP (x, 0);
2892 rtx op1 = XEXP (x, 1);
2893 if (GET_CODE (op0) == SUBREG)
2894 op0 = XEXP (op0, 0);
2895 if (GET_CODE (op1) == SUBREG)
2896 op1 = XEXP (op1, 0);
2897 /* We can't just accept any aligned register because CSE can
2898 change it to a register that is not marked aligned and then
2899 recog will fail. So we only accept frame registers because
2900 they will only be changed to other frame registers. */
2901 if (GET_CODE (op0) == REG
2902 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2903 && GET_CODE (op1) == CONST_INT
2904 && INTVAL (op1) >= -0x2000
2905 && INTVAL (op1) <= 0x1fff
5df189be 2906 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 2907 return 1;
2908 if (GET_CODE (op0) == REG
2909 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2910 && GET_CODE (op1) == REG
2911 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2912 return 1;
2913 }
2914 break;
2915
2916 default:
2917 break;
2918 }
2919 return 0;
2920}
2921
2922/* When the address is reg + const_int, force the const_int into a
fa7637bd 2923 register. */
644459d0 2924rtx
2925spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2926 enum machine_mode mode)
2927{
2928 rtx op0, op1;
2929 /* Make sure both operands are registers. */
2930 if (GET_CODE (x) == PLUS)
2931 {
2932 op0 = XEXP (x, 0);
2933 op1 = XEXP (x, 1);
2934 if (ALIGNED_SYMBOL_REF_P (op0))
2935 {
2936 op0 = force_reg (Pmode, op0);
2937 mark_reg_pointer (op0, 128);
2938 }
2939 else if (GET_CODE (op0) != REG)
2940 op0 = force_reg (Pmode, op0);
2941 if (ALIGNED_SYMBOL_REF_P (op1))
2942 {
2943 op1 = force_reg (Pmode, op1);
2944 mark_reg_pointer (op1, 128);
2945 }
2946 else if (GET_CODE (op1) != REG)
2947 op1 = force_reg (Pmode, op1);
2948 x = gen_rtx_PLUS (Pmode, op0, op1);
2949 if (spu_legitimate_address (mode, x, 0))
2950 return x;
2951 }
2952 return NULL_RTX;
2953}
2954
2955/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2956 struct attribute_spec.handler. */
2957static tree
2958spu_handle_fndecl_attribute (tree * node,
2959 tree name,
2960 tree args ATTRIBUTE_UNUSED,
2961 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2962{
2963 if (TREE_CODE (*node) != FUNCTION_DECL)
2964 {
2965 warning (0, "`%s' attribute only applies to functions",
2966 IDENTIFIER_POINTER (name));
2967 *no_add_attrs = true;
2968 }
2969
2970 return NULL_TREE;
2971}
2972
2973/* Handle the "vector" attribute. */
2974static tree
2975spu_handle_vector_attribute (tree * node, tree name,
2976 tree args ATTRIBUTE_UNUSED,
2977 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2978{
2979 tree type = *node, result = NULL_TREE;
2980 enum machine_mode mode;
2981 int unsigned_p;
2982
2983 while (POINTER_TYPE_P (type)
2984 || TREE_CODE (type) == FUNCTION_TYPE
2985 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2986 type = TREE_TYPE (type);
2987
2988 mode = TYPE_MODE (type);
2989
2990 unsigned_p = TYPE_UNSIGNED (type);
2991 switch (mode)
2992 {
2993 case DImode:
2994 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2995 break;
2996 case SImode:
2997 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2998 break;
2999 case HImode:
3000 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3001 break;
3002 case QImode:
3003 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3004 break;
3005 case SFmode:
3006 result = V4SF_type_node;
3007 break;
3008 case DFmode:
3009 result = V2DF_type_node;
3010 break;
3011 default:
3012 break;
3013 }
3014
3015 /* Propagate qualifiers attached to the element type
3016 onto the vector type. */
3017 if (result && result != type && TYPE_QUALS (type))
3018 result = build_qualified_type (result, TYPE_QUALS (type));
3019
3020 *no_add_attrs = true; /* No need to hang on to the attribute. */
3021
3022 if (!result)
3023 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3024 else
d991e6e8 3025 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3026
3027 return NULL_TREE;
3028}
3029
f2b32076 3030/* Return nonzero if FUNC is a naked function. */
644459d0 3031static int
3032spu_naked_function_p (tree func)
3033{
3034 tree a;
3035
3036 if (TREE_CODE (func) != FUNCTION_DECL)
3037 abort ();
3038
3039 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3040 return a != NULL_TREE;
3041}
3042
3043int
3044spu_initial_elimination_offset (int from, int to)
3045{
3046 int saved_regs_size = spu_saved_regs_size ();
3047 int sp_offset = 0;
abe32cce 3048 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3049 || get_frame_size () || saved_regs_size)
3050 sp_offset = STACK_POINTER_OFFSET;
3051 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3052 return (sp_offset + crtl->outgoing_args_size);
644459d0 3053 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3054 return 0;
3055 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3056 return sp_offset + crtl->outgoing_args_size
644459d0 3057 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3058 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3059 return get_frame_size () + saved_regs_size + sp_offset;
3060 return 0;
3061}
3062
3063rtx
fb80456a 3064spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3065{
3066 enum machine_mode mode = TYPE_MODE (type);
3067 int byte_size = ((mode == BLKmode)
3068 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3069
3070 /* Make sure small structs are left justified in a register. */
3071 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3072 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3073 {
3074 enum machine_mode smode;
3075 rtvec v;
3076 int i;
3077 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3078 int n = byte_size / UNITS_PER_WORD;
3079 v = rtvec_alloc (nregs);
3080 for (i = 0; i < n; i++)
3081 {
3082 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3083 gen_rtx_REG (TImode,
3084 FIRST_RETURN_REGNUM
3085 + i),
3086 GEN_INT (UNITS_PER_WORD * i));
3087 byte_size -= UNITS_PER_WORD;
3088 }
3089
3090 if (n < nregs)
3091 {
3092 if (byte_size < 4)
3093 byte_size = 4;
3094 smode =
3095 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3096 RTVEC_ELT (v, n) =
3097 gen_rtx_EXPR_LIST (VOIDmode,
3098 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3099 GEN_INT (UNITS_PER_WORD * n));
3100 }
3101 return gen_rtx_PARALLEL (mode, v);
3102 }
3103 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3104}
3105
3106rtx
3107spu_function_arg (CUMULATIVE_ARGS cum,
3108 enum machine_mode mode,
3109 tree type, int named ATTRIBUTE_UNUSED)
3110{
3111 int byte_size;
3112
3113 if (cum >= MAX_REGISTER_ARGS)
3114 return 0;
3115
3116 byte_size = ((mode == BLKmode)
3117 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3118
3119 /* The ABI does not allow parameters to be passed partially in
3120 reg and partially in stack. */
3121 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3122 return 0;
3123
3124 /* Make sure small structs are left justified in a register. */
3125 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3126 && byte_size < UNITS_PER_WORD && byte_size > 0)
3127 {
3128 enum machine_mode smode;
3129 rtx gr_reg;
3130 if (byte_size < 4)
3131 byte_size = 4;
3132 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3133 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3134 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3135 const0_rtx);
3136 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3137 }
3138 else
3139 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3140}
3141
3142/* Variable sized types are passed by reference. */
3143static bool
3144spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3145 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3146 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3147{
3148 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3149}
3150\f
3151
3152/* Var args. */
3153
3154/* Create and return the va_list datatype.
3155
3156 On SPU, va_list is an array type equivalent to
3157
3158 typedef struct __va_list_tag
3159 {
3160 void *__args __attribute__((__aligned(16)));
3161 void *__skip __attribute__((__aligned(16)));
3162
3163 } va_list[1];
3164
fa7637bd 3165 where __args points to the arg that will be returned by the next
644459d0 3166 va_arg(), and __skip points to the previous stack frame such that
3167 when __args == __skip we should advance __args by 32 bytes. */
3168static tree
3169spu_build_builtin_va_list (void)
3170{
3171 tree f_args, f_skip, record, type_decl;
3172 bool owp;
3173
3174 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3175
3176 type_decl =
3177 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3178
3179 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3180 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3181
3182 DECL_FIELD_CONTEXT (f_args) = record;
3183 DECL_ALIGN (f_args) = 128;
3184 DECL_USER_ALIGN (f_args) = 1;
3185
3186 DECL_FIELD_CONTEXT (f_skip) = record;
3187 DECL_ALIGN (f_skip) = 128;
3188 DECL_USER_ALIGN (f_skip) = 1;
3189
3190 TREE_CHAIN (record) = type_decl;
3191 TYPE_NAME (record) = type_decl;
3192 TYPE_FIELDS (record) = f_args;
3193 TREE_CHAIN (f_args) = f_skip;
3194
3195 /* We know this is being padded and we want it too. It is an internal
3196 type so hide the warnings from the user. */
3197 owp = warn_padded;
3198 warn_padded = false;
3199
3200 layout_type (record);
3201
3202 warn_padded = owp;
3203
3204 /* The correct type is an array type of one element. */
3205 return build_array_type (record, build_index_type (size_zero_node));
3206}
3207
3208/* Implement va_start by filling the va_list structure VALIST.
3209 NEXTARG points to the first anonymous stack argument.
3210
3211 The following global variables are used to initialize
3212 the va_list structure:
3213
abe32cce 3214 crtl->args.info;
644459d0 3215 the CUMULATIVE_ARGS for this function
3216
abe32cce 3217 crtl->args.arg_offset_rtx:
644459d0 3218 holds the offset of the first anonymous stack argument
3219 (relative to the virtual arg pointer). */
3220
8a58ed0a 3221static void
644459d0 3222spu_va_start (tree valist, rtx nextarg)
3223{
3224 tree f_args, f_skip;
3225 tree args, skip, t;
3226
3227 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3228 f_skip = TREE_CHAIN (f_args);
3229
3230 valist = build_va_arg_indirect_ref (valist);
3231 args =
3232 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3233 skip =
3234 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3235
3236 /* Find the __args area. */
3237 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3238 if (crtl->args.pretend_args_size > 0)
0de36bdb 3239 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3240 size_int (-STACK_POINTER_OFFSET));
35cc02b5 3241 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
644459d0 3242 TREE_SIDE_EFFECTS (t) = 1;
3243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3244
3245 /* Find the __skip area. */
3246 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 3247 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 3248 size_int (crtl->args.pretend_args_size
0de36bdb 3249 - STACK_POINTER_OFFSET));
35cc02b5 3250 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
644459d0 3251 TREE_SIDE_EFFECTS (t) = 1;
3252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3253}
3254
3255/* Gimplify va_arg by updating the va_list structure
3256 VALIST as required to retrieve an argument of type
3257 TYPE, and returning that argument.
3258
3259 ret = va_arg(VALIST, TYPE);
3260
3261 generates code equivalent to:
3262
3263 paddedsize = (sizeof(TYPE) + 15) & -16;
3264 if (VALIST.__args + paddedsize > VALIST.__skip
3265 && VALIST.__args <= VALIST.__skip)
3266 addr = VALIST.__skip + 32;
3267 else
3268 addr = VALIST.__args;
3269 VALIST.__args = addr + paddedsize;
3270 ret = *(TYPE *)addr;
3271 */
3272static tree
3273spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3274 tree * post_p ATTRIBUTE_UNUSED)
3275{
3276 tree f_args, f_skip;
3277 tree args, skip;
3278 HOST_WIDE_INT size, rsize;
3279 tree paddedsize, addr, tmp;
3280 bool pass_by_reference_p;
3281
3282 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3283 f_skip = TREE_CHAIN (f_args);
3284
3285 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3286 args =
3287 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3288 skip =
3289 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3290
3291 addr = create_tmp_var (ptr_type_node, "va_arg");
3292 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3293
3294 /* if an object is dynamically sized, a pointer to it is passed
3295 instead of the object itself. */
3296 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3297 false);
3298 if (pass_by_reference_p)
3299 type = build_pointer_type (type);
3300 size = int_size_in_bytes (type);
3301 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3302
3303 /* build conditional expression to calculate addr. The expression
3304 will be gimplified later. */
0de36bdb 3305 paddedsize = size_int (rsize);
3306 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, args, paddedsize);
644459d0 3307 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3308 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3309 build2 (LE_EXPR, boolean_type_node, args, skip));
3310
3311 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
0de36bdb 3312 build2 (POINTER_PLUS_EXPR, ptr_type_node, skip,
3313 size_int (32)), args);
644459d0 3314
35cc02b5 3315 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
644459d0 3316 gimplify_and_add (tmp, pre_p);
3317
3318 /* update VALIST.__args */
0de36bdb 3319 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
35cc02b5 3320 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
644459d0 3321 gimplify_and_add (tmp, pre_p);
3322
3323 addr = fold_convert (build_pointer_type (type), addr);
3324
3325 if (pass_by_reference_p)
3326 addr = build_va_arg_indirect_ref (addr);
3327
3328 return build_va_arg_indirect_ref (addr);
3329}
3330
3331/* Save parameter registers starting with the register that corresponds
3332 to the first unnamed parameters. If the first unnamed parameter is
3333 in the stack then save no registers. Set pretend_args_size to the
3334 amount of space needed to save the registers. */
3335void
3336spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3337 tree type, int *pretend_size, int no_rtl)
3338{
3339 if (!no_rtl)
3340 {
3341 rtx tmp;
3342 int regno;
3343 int offset;
3344 int ncum = *cum;
3345
3346 /* cum currently points to the last named argument, we want to
3347 start at the next argument. */
3348 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3349
3350 offset = -STACK_POINTER_OFFSET;
3351 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3352 {
3353 tmp = gen_frame_mem (V4SImode,
3354 plus_constant (virtual_incoming_args_rtx,
3355 offset));
3356 emit_move_insn (tmp,
3357 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3358 offset += 16;
3359 }
3360 *pretend_size = offset + STACK_POINTER_OFFSET;
3361 }
3362}
3363\f
3364void
3365spu_conditional_register_usage (void)
3366{
3367 if (flag_pic)
3368 {
3369 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3370 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3371 }
644459d0 3372}
3373
3374/* This is called to decide when we can simplify a load instruction. We
3375 must only return true for registers which we know will always be
3376 aligned. Taking into account that CSE might replace this reg with
3377 another one that has not been marked aligned.
3378 So this is really only true for frame, stack and virtual registers,
fa7637bd 3379 which we know are always aligned and should not be adversely effected
3380 by CSE. */
644459d0 3381static int
3382regno_aligned_for_load (int regno)
3383{
3384 return regno == FRAME_POINTER_REGNUM
5df189be 3385 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
aa71ecd4 3386 || regno == ARG_POINTER_REGNUM
644459d0 3387 || regno == STACK_POINTER_REGNUM
5df189be 3388 || (regno >= FIRST_VIRTUAL_REGISTER
3389 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 3390}
3391
3392/* Return TRUE when mem is known to be 16-byte aligned. */
3393int
3394aligned_mem_p (rtx mem)
3395{
3396 if (MEM_ALIGN (mem) >= 128)
3397 return 1;
3398 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3399 return 1;
3400 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3401 {
3402 rtx p0 = XEXP (XEXP (mem, 0), 0);
3403 rtx p1 = XEXP (XEXP (mem, 0), 1);
3404 if (regno_aligned_for_load (REGNO (p0)))
3405 {
3406 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3407 return 1;
3408 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3409 return 1;
3410 }
3411 }
3412 else if (GET_CODE (XEXP (mem, 0)) == REG)
3413 {
3414 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3415 return 1;
3416 }
3417 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3418 return 1;
3419 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3420 {
3421 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3422 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3423 if (GET_CODE (p0) == SYMBOL_REF
3424 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3425 return 1;
3426 }
3427 return 0;
3428}
3429
69ced2d6 3430/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3431 into its SYMBOL_REF_FLAGS. */
3432static void
3433spu_encode_section_info (tree decl, rtx rtl, int first)
3434{
3435 default_encode_section_info (decl, rtl, first);
3436
3437 /* If a variable has a forced alignment to < 16 bytes, mark it with
3438 SYMBOL_FLAG_ALIGN1. */
3439 if (TREE_CODE (decl) == VAR_DECL
3440 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3441 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3442}
3443
644459d0 3444/* Return TRUE if we are certain the mem refers to a complete object
3445 which is both 16-byte aligned and padded to a 16-byte boundary. This
3446 would make it safe to store with a single instruction.
3447 We guarantee the alignment and padding for static objects by aligning
3448 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3449 FIXME: We currently cannot guarantee this for objects on the stack
3450 because assign_parm_setup_stack calls assign_stack_local with the
3451 alignment of the parameter mode and in that case the alignment never
3452 gets adjusted by LOCAL_ALIGNMENT. */
3453static int
3454store_with_one_insn_p (rtx mem)
3455{
3456 rtx addr = XEXP (mem, 0);
3457 if (GET_MODE (mem) == BLKmode)
3458 return 0;
3459 /* Only static objects. */
3460 if (GET_CODE (addr) == SYMBOL_REF)
3461 {
3462 /* We use the associated declaration to make sure the access is
fa7637bd 3463 referring to the whole object.
644459d0 3464 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3465 if it is necessary. Will there be cases where one exists, and
3466 the other does not? Will there be cases where both exist, but
3467 have different types? */
3468 tree decl = MEM_EXPR (mem);
3469 if (decl
3470 && TREE_CODE (decl) == VAR_DECL
3471 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3472 return 1;
3473 decl = SYMBOL_REF_DECL (addr);
3474 if (decl
3475 && TREE_CODE (decl) == VAR_DECL
3476 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3477 return 1;
3478 }
3479 return 0;
3480}
3481
3482int
3483spu_expand_mov (rtx * ops, enum machine_mode mode)
3484{
3485 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3486 abort ();
3487
3488 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3489 {
3490 rtx from = SUBREG_REG (ops[1]);
3491 enum machine_mode imode = GET_MODE (from);
3492
3493 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3494 && GET_MODE_CLASS (imode) == MODE_INT
3495 && subreg_lowpart_p (ops[1]));
3496
3497 if (GET_MODE_SIZE (imode) < 4)
3498 {
3499 from = gen_rtx_SUBREG (SImode, from, 0);
3500 imode = SImode;
3501 }
3502
3503 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3504 {
99bdde56 3505 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 3506 emit_insn (GEN_FCN (icode) (ops[0], from));
3507 }
3508 else
3509 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3510 return 1;
3511 }
3512
3513 /* At least one of the operands needs to be a register. */
3514 if ((reload_in_progress | reload_completed) == 0
3515 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3516 {
3517 rtx temp = force_reg (mode, ops[1]);
3518 emit_move_insn (ops[0], temp);
3519 return 1;
3520 }
3521 if (reload_in_progress || reload_completed)
3522 {
dea01258 3523 if (CONSTANT_P (ops[1]))
3524 return spu_split_immediate (ops);
644459d0 3525 return 0;
3526 }
3527 else
3528 {
3529 if (GET_CODE (ops[0]) == MEM)
3530 {
3531 if (!spu_valid_move (ops))
3532 {
3533 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3534 gen_reg_rtx (TImode)));
3535 return 1;
3536 }
3537 }
3538 else if (GET_CODE (ops[1]) == MEM)
3539 {
3540 if (!spu_valid_move (ops))
3541 {
3542 emit_insn (gen_load
3543 (ops[0], ops[1], gen_reg_rtx (TImode),
3544 gen_reg_rtx (SImode)));
3545 return 1;
3546 }
3547 }
3548 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3549 extend them. */
3550 if (GET_CODE (ops[1]) == CONST_INT)
3551 {
3552 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3553 if (val != INTVAL (ops[1]))
3554 {
3555 emit_move_insn (ops[0], GEN_INT (val));
3556 return 1;
3557 }
3558 }
3559 }
3560 return 0;
3561}
3562
644459d0 3563void
3564spu_split_load (rtx * ops)
3565{
3566 enum machine_mode mode = GET_MODE (ops[0]);
3567 rtx addr, load, rot, mem, p0, p1;
3568 int rot_amt;
3569
3570 addr = XEXP (ops[1], 0);
3571
3572 rot = 0;
3573 rot_amt = 0;
3574 if (GET_CODE (addr) == PLUS)
3575 {
3576 /* 8 cases:
3577 aligned reg + aligned reg => lqx
3578 aligned reg + unaligned reg => lqx, rotqby
3579 aligned reg + aligned const => lqd
3580 aligned reg + unaligned const => lqd, rotqbyi
3581 unaligned reg + aligned reg => lqx, rotqby
3582 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3583 unaligned reg + aligned const => lqd, rotqby
3584 unaligned reg + unaligned const -> not allowed by legitimate address
3585 */
3586 p0 = XEXP (addr, 0);
3587 p1 = XEXP (addr, 1);
aa71ecd4 3588 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
644459d0 3589 {
aa71ecd4 3590 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 3591 {
3592 emit_insn (gen_addsi3 (ops[3], p0, p1));
3593 rot = ops[3];
3594 }
3595 else
3596 rot = p0;
3597 }
3598 else
3599 {
3600 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3601 {
3602 rot_amt = INTVAL (p1) & 15;
3603 p1 = GEN_INT (INTVAL (p1) & -16);
3604 addr = gen_rtx_PLUS (SImode, p0, p1);
3605 }
aa71ecd4 3606 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 3607 rot = p1;
3608 }
3609 }
3610 else if (GET_CODE (addr) == REG)
3611 {
aa71ecd4 3612 if (!regno_aligned_for_load (REGNO (addr)))
644459d0 3613 rot = addr;
3614 }
3615 else if (GET_CODE (addr) == CONST)
3616 {
3617 if (GET_CODE (XEXP (addr, 0)) == PLUS
3618 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3619 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3620 {
3621 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3622 if (rot_amt & -16)
3623 addr = gen_rtx_CONST (Pmode,
3624 gen_rtx_PLUS (Pmode,
3625 XEXP (XEXP (addr, 0), 0),
3626 GEN_INT (rot_amt & -16)));
3627 else
3628 addr = XEXP (XEXP (addr, 0), 0);
3629 }
3630 else
3631 rot = addr;
3632 }
3633 else if (GET_CODE (addr) == CONST_INT)
3634 {
3635 rot_amt = INTVAL (addr);
3636 addr = GEN_INT (rot_amt & -16);
3637 }
3638 else if (!ALIGNED_SYMBOL_REF_P (addr))
3639 rot = addr;
3640
3641 if (GET_MODE_SIZE (mode) < 4)
3642 rot_amt += GET_MODE_SIZE (mode) - 4;
3643
3644 rot_amt &= 15;
3645
3646 if (rot && rot_amt)
3647 {
3648 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3649 rot = ops[3];
3650 rot_amt = 0;
3651 }
3652
3653 load = ops[2];
3654
3655 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3656 mem = change_address (ops[1], TImode, addr);
3657
e04cf423 3658 emit_insn (gen_movti (load, mem));
644459d0 3659
3660 if (rot)
3661 emit_insn (gen_rotqby_ti (load, load, rot));
3662 else if (rot_amt)
3663 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3664
3665 if (reload_completed)
3666 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3667 else
3668 emit_insn (gen_spu_convert (ops[0], load));
3669}
3670
3671void
3672spu_split_store (rtx * ops)
3673{
3674 enum machine_mode mode = GET_MODE (ops[0]);
3675 rtx pat = ops[2];
3676 rtx reg = ops[3];
3677 rtx addr, p0, p1, p1_lo, smem;
3678 int aform;
3679 int scalar;
3680
3681 addr = XEXP (ops[0], 0);
3682
3683 if (GET_CODE (addr) == PLUS)
3684 {
3685 /* 8 cases:
3686 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3687 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3688 aligned reg + aligned const => lqd, c?d, shuf, stqx
3689 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3690 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3691 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3692 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3693 unaligned reg + unaligned const -> not allowed by legitimate address
3694 */
3695 aform = 0;
3696 p0 = XEXP (addr, 0);
3697 p1 = p1_lo = XEXP (addr, 1);
3698 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3699 {
3700 p1_lo = GEN_INT (INTVAL (p1) & 15);
3701 p1 = GEN_INT (INTVAL (p1) & -16);
3702 addr = gen_rtx_PLUS (SImode, p0, p1);
3703 }
3704 }
3705 else if (GET_CODE (addr) == REG)
3706 {
3707 aform = 0;
3708 p0 = addr;
3709 p1 = p1_lo = const0_rtx;
3710 }
3711 else
3712 {
3713 aform = 1;
3714 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3715 p1 = 0; /* aform doesn't use p1 */
3716 p1_lo = addr;
3717 if (ALIGNED_SYMBOL_REF_P (addr))
3718 p1_lo = const0_rtx;
3719 else if (GET_CODE (addr) == CONST)
3720 {
3721 if (GET_CODE (XEXP (addr, 0)) == PLUS
3722 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3723 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3724 {
3725 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3726 if ((v & -16) != 0)
3727 addr = gen_rtx_CONST (Pmode,
3728 gen_rtx_PLUS (Pmode,
3729 XEXP (XEXP (addr, 0), 0),
3730 GEN_INT (v & -16)));
3731 else
3732 addr = XEXP (XEXP (addr, 0), 0);
3733 p1_lo = GEN_INT (v & 15);
3734 }
3735 }
3736 else if (GET_CODE (addr) == CONST_INT)
3737 {
3738 p1_lo = GEN_INT (INTVAL (addr) & 15);
3739 addr = GEN_INT (INTVAL (addr) & -16);
3740 }
3741 }
3742
e04cf423 3743 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3744
644459d0 3745 scalar = store_with_one_insn_p (ops[0]);
3746 if (!scalar)
3747 {
3748 /* We could copy the flags from the ops[0] MEM to mem here,
3749 We don't because we want this load to be optimized away if
3750 possible, and copying the flags will prevent that in certain
3751 cases, e.g. consider the volatile flag. */
3752
e04cf423 3753 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3754 set_mem_alias_set (lmem, 0);
3755 emit_insn (gen_movti (reg, lmem));
644459d0 3756
aa71ecd4 3757 if (!p0 || regno_aligned_for_load (REGNO (p0)))
644459d0 3758 p0 = stack_pointer_rtx;
3759 if (!p1_lo)
3760 p1_lo = const0_rtx;
3761
3762 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3763 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3764 }
3765 else if (reload_completed)
3766 {
3767 if (GET_CODE (ops[1]) == REG)
3768 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3769 else if (GET_CODE (ops[1]) == SUBREG)
3770 emit_move_insn (reg,
3771 gen_rtx_REG (GET_MODE (reg),
3772 REGNO (SUBREG_REG (ops[1]))));
3773 else
3774 abort ();
3775 }
3776 else
3777 {
3778 if (GET_CODE (ops[1]) == REG)
3779 emit_insn (gen_spu_convert (reg, ops[1]));
3780 else if (GET_CODE (ops[1]) == SUBREG)
3781 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3782 else
3783 abort ();
3784 }
3785
3786 if (GET_MODE_SIZE (mode) < 4 && scalar)
3787 emit_insn (gen_shlqby_ti
3788 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3789
644459d0 3790 smem = change_address (ops[0], TImode, addr);
3791 /* We can't use the previous alias set because the memory has changed
3792 size and can potentially overlap objects of other types. */
3793 set_mem_alias_set (smem, 0);
3794
e04cf423 3795 emit_insn (gen_movti (smem, reg));
644459d0 3796}
3797
3798/* Return TRUE if X is MEM which is a struct member reference
3799 and the member can safely be loaded and stored with a single
3800 instruction because it is padded. */
3801static int
3802mem_is_padded_component_ref (rtx x)
3803{
3804 tree t = MEM_EXPR (x);
3805 tree r;
3806 if (!t || TREE_CODE (t) != COMPONENT_REF)
3807 return 0;
3808 t = TREE_OPERAND (t, 1);
3809 if (!t || TREE_CODE (t) != FIELD_DECL
3810 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3811 return 0;
3812 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3813 r = DECL_FIELD_CONTEXT (t);
3814 if (!r || TREE_CODE (r) != RECORD_TYPE)
3815 return 0;
3816 /* Make sure they are the same mode */
3817 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3818 return 0;
3819 /* If there are no following fields then the field alignment assures
fa7637bd 3820 the structure is padded to the alignment which means this field is
3821 padded too. */
644459d0 3822 if (TREE_CHAIN (t) == 0)
3823 return 1;
3824 /* If the following field is also aligned then this field will be
3825 padded. */
3826 t = TREE_CHAIN (t);
3827 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3828 return 1;
3829 return 0;
3830}
3831
c7b91b14 3832/* Parse the -mfixed-range= option string. */
3833static void
3834fix_range (const char *const_str)
3835{
3836 int i, first, last;
3837 char *str, *dash, *comma;
3838
3839 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3840 REG2 are either register names or register numbers. The effect
3841 of this option is to mark the registers in the range from REG1 to
3842 REG2 as ``fixed'' so they won't be used by the compiler. */
3843
3844 i = strlen (const_str);
3845 str = (char *) alloca (i + 1);
3846 memcpy (str, const_str, i + 1);
3847
3848 while (1)
3849 {
3850 dash = strchr (str, '-');
3851 if (!dash)
3852 {
3853 warning (0, "value of -mfixed-range must have form REG1-REG2");
3854 return;
3855 }
3856 *dash = '\0';
3857 comma = strchr (dash + 1, ',');
3858 if (comma)
3859 *comma = '\0';
3860
3861 first = decode_reg_name (str);
3862 if (first < 0)
3863 {
3864 warning (0, "unknown register name: %s", str);
3865 return;
3866 }
3867
3868 last = decode_reg_name (dash + 1);
3869 if (last < 0)
3870 {
3871 warning (0, "unknown register name: %s", dash + 1);
3872 return;
3873 }
3874
3875 *dash = '-';
3876
3877 if (first > last)
3878 {
3879 warning (0, "%s-%s is an empty range", str, dash + 1);
3880 return;
3881 }
3882
3883 for (i = first; i <= last; ++i)
3884 fixed_regs[i] = call_used_regs[i] = 1;
3885
3886 if (!comma)
3887 break;
3888
3889 *comma = ',';
3890 str = comma + 1;
3891 }
3892}
3893
644459d0 3894int
3895spu_valid_move (rtx * ops)
3896{
3897 enum machine_mode mode = GET_MODE (ops[0]);
3898 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3899 return 0;
3900
3901 /* init_expr_once tries to recog against load and store insns to set
3902 the direct_load[] and direct_store[] arrays. We always want to
3903 consider those loads and stores valid. init_expr_once is called in
3904 the context of a dummy function which does not have a decl. */
3905 if (cfun->decl == 0)
3906 return 1;
3907
3908 /* Don't allows loads/stores which would require more than 1 insn.
3909 During and after reload we assume loads and stores only take 1
3910 insn. */
3911 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3912 {
3913 if (GET_CODE (ops[0]) == MEM
3914 && (GET_MODE_SIZE (mode) < 4
3915 || !(store_with_one_insn_p (ops[0])
3916 || mem_is_padded_component_ref (ops[0]))))
3917 return 0;
3918 if (GET_CODE (ops[1]) == MEM
3919 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3920 return 0;
3921 }
3922 return 1;
3923}
3924
3925/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3926 can be generated using the fsmbi instruction. */
3927int
3928fsmbi_const_p (rtx x)
3929{
dea01258 3930 if (CONSTANT_P (x))
3931 {
5df189be 3932 /* We can always choose TImode for CONST_INT because the high bits
dea01258 3933 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 3934 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 3935 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 3936 }
3937 return 0;
3938}
3939
3940/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3941 can be generated using the cbd, chd, cwd or cdd instruction. */
3942int
3943cpat_const_p (rtx x, enum machine_mode mode)
3944{
3945 if (CONSTANT_P (x))
3946 {
3947 enum immediate_class c = classify_immediate (x, mode);
3948 return c == IC_CPAT;
3949 }
3950 return 0;
3951}
644459d0 3952
dea01258 3953rtx
3954gen_cpat_const (rtx * ops)
3955{
3956 unsigned char dst[16];
3957 int i, offset, shift, isize;
3958 if (GET_CODE (ops[3]) != CONST_INT
3959 || GET_CODE (ops[2]) != CONST_INT
3960 || (GET_CODE (ops[1]) != CONST_INT
3961 && GET_CODE (ops[1]) != REG))
3962 return 0;
3963 if (GET_CODE (ops[1]) == REG
3964 && (!REG_POINTER (ops[1])
3965 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3966 return 0;
644459d0 3967
3968 for (i = 0; i < 16; i++)
dea01258 3969 dst[i] = i + 16;
3970 isize = INTVAL (ops[3]);
3971 if (isize == 1)
3972 shift = 3;
3973 else if (isize == 2)
3974 shift = 2;
3975 else
3976 shift = 0;
3977 offset = (INTVAL (ops[2]) +
3978 (GET_CODE (ops[1]) ==
3979 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3980 for (i = 0; i < isize; i++)
3981 dst[offset + i] = i + shift;
3982 return array_to_constant (TImode, dst);
644459d0 3983}
3984
3985/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3986 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3987 than 16 bytes, the value is repeated across the rest of the array. */
3988void
3989constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3990{
3991 HOST_WIDE_INT val;
3992 int i, j, first;
3993
3994 memset (arr, 0, 16);
3995 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3996 if (GET_CODE (x) == CONST_INT
3997 || (GET_CODE (x) == CONST_DOUBLE
3998 && (mode == SFmode || mode == DFmode)))
3999 {
4000 gcc_assert (mode != VOIDmode && mode != BLKmode);
4001
4002 if (GET_CODE (x) == CONST_DOUBLE)
4003 val = const_double_to_hwint (x);
4004 else
4005 val = INTVAL (x);
4006 first = GET_MODE_SIZE (mode) - 1;
4007 for (i = first; i >= 0; i--)
4008 {
4009 arr[i] = val & 0xff;
4010 val >>= 8;
4011 }
4012 /* Splat the constant across the whole array. */
4013 for (j = 0, i = first + 1; i < 16; i++)
4014 {
4015 arr[i] = arr[j];
4016 j = (j == first) ? 0 : j + 1;
4017 }
4018 }
4019 else if (GET_CODE (x) == CONST_DOUBLE)
4020 {
4021 val = CONST_DOUBLE_LOW (x);
4022 for (i = 15; i >= 8; i--)
4023 {
4024 arr[i] = val & 0xff;
4025 val >>= 8;
4026 }
4027 val = CONST_DOUBLE_HIGH (x);
4028 for (i = 7; i >= 0; i--)
4029 {
4030 arr[i] = val & 0xff;
4031 val >>= 8;
4032 }
4033 }
4034 else if (GET_CODE (x) == CONST_VECTOR)
4035 {
4036 int units;
4037 rtx elt;
4038 mode = GET_MODE_INNER (mode);
4039 units = CONST_VECTOR_NUNITS (x);
4040 for (i = 0; i < units; i++)
4041 {
4042 elt = CONST_VECTOR_ELT (x, i);
4043 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4044 {
4045 if (GET_CODE (elt) == CONST_DOUBLE)
4046 val = const_double_to_hwint (elt);
4047 else
4048 val = INTVAL (elt);
4049 first = GET_MODE_SIZE (mode) - 1;
4050 if (first + i * GET_MODE_SIZE (mode) > 16)
4051 abort ();
4052 for (j = first; j >= 0; j--)
4053 {
4054 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4055 val >>= 8;
4056 }
4057 }
4058 }
4059 }
4060 else
4061 gcc_unreachable();
4062}
4063
4064/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4065 smaller than 16 bytes, use the bytes that would represent that value
4066 in a register, e.g., for QImode return the value of arr[3]. */
4067rtx
4068array_to_constant (enum machine_mode mode, unsigned char arr[16])
4069{
4070 enum machine_mode inner_mode;
4071 rtvec v;
4072 int units, size, i, j, k;
4073 HOST_WIDE_INT val;
4074
4075 if (GET_MODE_CLASS (mode) == MODE_INT
4076 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4077 {
4078 j = GET_MODE_SIZE (mode);
4079 i = j < 4 ? 4 - j : 0;
4080 for (val = 0; i < j; i++)
4081 val = (val << 8) | arr[i];
4082 val = trunc_int_for_mode (val, mode);
4083 return GEN_INT (val);
4084 }
4085
4086 if (mode == TImode)
4087 {
4088 HOST_WIDE_INT high;
4089 for (i = high = 0; i < 8; i++)
4090 high = (high << 8) | arr[i];
4091 for (i = 8, val = 0; i < 16; i++)
4092 val = (val << 8) | arr[i];
4093 return immed_double_const (val, high, TImode);
4094 }
4095 if (mode == SFmode)
4096 {
4097 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4098 val = trunc_int_for_mode (val, SImode);
171b6d22 4099 return hwint_to_const_double (SFmode, val);
644459d0 4100 }
4101 if (mode == DFmode)
4102 {
4103 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4104 val <<= 32;
4105 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
171b6d22 4106 return hwint_to_const_double (DFmode, val);
644459d0 4107 }
4108
4109 if (!VECTOR_MODE_P (mode))
4110 abort ();
4111
4112 units = GET_MODE_NUNITS (mode);
4113 size = GET_MODE_UNIT_SIZE (mode);
4114 inner_mode = GET_MODE_INNER (mode);
4115 v = rtvec_alloc (units);
4116
4117 for (k = i = 0; i < units; ++i)
4118 {
4119 val = 0;
4120 for (j = 0; j < size; j++, k++)
4121 val = (val << 8) | arr[k];
4122
4123 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4124 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4125 else
4126 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4127 }
4128 if (k > 16)
4129 abort ();
4130
4131 return gen_rtx_CONST_VECTOR (mode, v);
4132}
4133
4134static void
4135reloc_diagnostic (rtx x)
4136{
4137 tree loc_decl, decl = 0;
4138 const char *msg;
4139 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4140 return;
4141
4142 if (GET_CODE (x) == SYMBOL_REF)
4143 decl = SYMBOL_REF_DECL (x);
4144 else if (GET_CODE (x) == CONST
4145 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4146 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4147
4148 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4149 if (decl && !DECL_P (decl))
4150 decl = 0;
4151
4152 /* We use last_assemble_variable_decl to get line information. It's
4153 not always going to be right and might not even be close, but will
4154 be right for the more common cases. */
5df189be 4155 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4156 loc_decl = decl;
4157 else
4158 loc_decl = last_assemble_variable_decl;
4159
4160 /* The decl could be a string constant. */
4161 if (decl && DECL_P (decl))
4162 msg = "%Jcreating run-time relocation for %qD";
4163 else
4164 msg = "creating run-time relocation";
4165
99369027 4166 if (TARGET_WARN_RELOC)
644459d0 4167 warning (0, msg, loc_decl, decl);
99369027 4168 else
4169 error (msg, loc_decl, decl);
644459d0 4170}
4171
4172/* Hook into assemble_integer so we can generate an error for run-time
4173 relocations. The SPU ABI disallows them. */
4174static bool
4175spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4176{
4177 /* By default run-time relocations aren't supported, but we allow them
4178 in case users support it in their own run-time loader. And we provide
4179 a warning for those users that don't. */
4180 if ((GET_CODE (x) == SYMBOL_REF)
4181 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4182 reloc_diagnostic (x);
4183
4184 return default_assemble_integer (x, size, aligned_p);
4185}
4186
4187static void
4188spu_asm_globalize_label (FILE * file, const char *name)
4189{
4190 fputs ("\t.global\t", file);
4191 assemble_name (file, name);
4192 fputs ("\n", file);
4193}
4194
4195static bool
4196spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
4197{
4198 enum machine_mode mode = GET_MODE (x);
4199 int cost = COSTS_N_INSNS (2);
4200
4201 /* Folding to a CONST_VECTOR will use extra space but there might
4202 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4203 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4204 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4205 because this cost will only be compared against a single insn.
4206 if (code == CONST_VECTOR)
4207 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4208 */
4209
4210 /* Use defaults for float operations. Not accurate but good enough. */
4211 if (mode == DFmode)
4212 {
4213 *total = COSTS_N_INSNS (13);
4214 return true;
4215 }
4216 if (mode == SFmode)
4217 {
4218 *total = COSTS_N_INSNS (6);
4219 return true;
4220 }
4221 switch (code)
4222 {
4223 case CONST_INT:
4224 if (satisfies_constraint_K (x))
4225 *total = 0;
4226 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4227 *total = COSTS_N_INSNS (1);
4228 else
4229 *total = COSTS_N_INSNS (3);
4230 return true;
4231
4232 case CONST:
4233 *total = COSTS_N_INSNS (3);
4234 return true;
4235
4236 case LABEL_REF:
4237 case SYMBOL_REF:
4238 *total = COSTS_N_INSNS (0);
4239 return true;
4240
4241 case CONST_DOUBLE:
4242 *total = COSTS_N_INSNS (5);
4243 return true;
4244
4245 case FLOAT_EXTEND:
4246 case FLOAT_TRUNCATE:
4247 case FLOAT:
4248 case UNSIGNED_FLOAT:
4249 case FIX:
4250 case UNSIGNED_FIX:
4251 *total = COSTS_N_INSNS (7);
4252 return true;
4253
4254 case PLUS:
4255 if (mode == TImode)
4256 {
4257 *total = COSTS_N_INSNS (9);
4258 return true;
4259 }
4260 break;
4261
4262 case MULT:
4263 cost =
4264 GET_CODE (XEXP (x, 0)) ==
4265 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4266 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4267 {
4268 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4269 {
4270 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4271 cost = COSTS_N_INSNS (14);
4272 if ((val & 0xffff) == 0)
4273 cost = COSTS_N_INSNS (9);
4274 else if (val > 0 && val < 0x10000)
4275 cost = COSTS_N_INSNS (11);
4276 }
4277 }
4278 *total = cost;
4279 return true;
4280 case DIV:
4281 case UDIV:
4282 case MOD:
4283 case UMOD:
4284 *total = COSTS_N_INSNS (20);
4285 return true;
4286 case ROTATE:
4287 case ROTATERT:
4288 case ASHIFT:
4289 case ASHIFTRT:
4290 case LSHIFTRT:
4291 *total = COSTS_N_INSNS (4);
4292 return true;
4293 case UNSPEC:
4294 if (XINT (x, 1) == UNSPEC_CONVERT)
4295 *total = COSTS_N_INSNS (0);
4296 else
4297 *total = COSTS_N_INSNS (4);
4298 return true;
4299 }
4300 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4301 if (GET_MODE_CLASS (mode) == MODE_INT
4302 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4303 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4304 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4305 *total = cost;
4306 return true;
4307}
4308
1bd43494 4309static enum machine_mode
4310spu_unwind_word_mode (void)
644459d0 4311{
1bd43494 4312 return SImode;
644459d0 4313}
4314
4315/* Decide whether we can make a sibling call to a function. DECL is the
4316 declaration of the function being targeted by the call and EXP is the
4317 CALL_EXPR representing the call. */
4318static bool
4319spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4320{
4321 return decl && !TARGET_LARGE_MEM;
4322}
4323
4324/* We need to correctly update the back chain pointer and the Available
4325 Stack Size (which is in the second slot of the sp register.) */
4326void
4327spu_allocate_stack (rtx op0, rtx op1)
4328{
4329 HOST_WIDE_INT v;
4330 rtx chain = gen_reg_rtx (V4SImode);
4331 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4332 rtx sp = gen_reg_rtx (V4SImode);
4333 rtx splatted = gen_reg_rtx (V4SImode);
4334 rtx pat = gen_reg_rtx (TImode);
4335
4336 /* copy the back chain so we can save it back again. */
4337 emit_move_insn (chain, stack_bot);
4338
4339 op1 = force_reg (SImode, op1);
4340
4341 v = 0x1020300010203ll;
4342 emit_move_insn (pat, immed_double_const (v, v, TImode));
4343 emit_insn (gen_shufb (splatted, op1, op1, pat));
4344
4345 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4346 emit_insn (gen_subv4si3 (sp, sp, splatted));
4347
4348 if (flag_stack_check)
4349 {
4350 rtx avail = gen_reg_rtx(SImode);
4351 rtx result = gen_reg_rtx(SImode);
4352 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4353 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4354 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4355 }
4356
4357 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4358
4359 emit_move_insn (stack_bot, chain);
4360
4361 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4362}
4363
4364void
4365spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4366{
4367 static unsigned char arr[16] =
4368 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4369 rtx temp = gen_reg_rtx (SImode);
4370 rtx temp2 = gen_reg_rtx (SImode);
4371 rtx temp3 = gen_reg_rtx (V4SImode);
4372 rtx temp4 = gen_reg_rtx (V4SImode);
4373 rtx pat = gen_reg_rtx (TImode);
4374 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4375
4376 /* Restore the backchain from the first word, sp from the second. */
4377 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4378 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4379
4380 emit_move_insn (pat, array_to_constant (TImode, arr));
4381
4382 /* Compute Available Stack Size for sp */
4383 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4384 emit_insn (gen_shufb (temp3, temp, temp, pat));
4385
4386 /* Compute Available Stack Size for back chain */
4387 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4388 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4389 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4390
4391 emit_insn (gen_addv4si3 (sp, sp, temp3));
4392 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4393}
4394
4395static void
4396spu_init_libfuncs (void)
4397{
4398 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4399 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4400 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4401 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4402 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4403 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4404 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4405 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4406 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4407 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4408 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4409
4410 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4411 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 4412
4413 set_optab_libfunc (smul_optab, TImode, "__multi3");
4414 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
4415 set_optab_libfunc (smod_optab, TImode, "__modti3");
4416 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
4417 set_optab_libfunc (umod_optab, TImode, "__umodti3");
4418 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 4419}
4420
4421/* Make a subreg, stripping any existing subreg. We could possibly just
4422 call simplify_subreg, but in this case we know what we want. */
4423rtx
4424spu_gen_subreg (enum machine_mode mode, rtx x)
4425{
4426 if (GET_CODE (x) == SUBREG)
4427 x = SUBREG_REG (x);
4428 if (GET_MODE (x) == mode)
4429 return x;
4430 return gen_rtx_SUBREG (mode, x, 0);
4431}
4432
4433static bool
fb80456a 4434spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 4435{
4436 return (TYPE_MODE (type) == BLKmode
4437 && ((type) == 0
4438 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4439 || int_size_in_bytes (type) >
4440 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4441}
4442\f
4443/* Create the built-in types and functions */
4444
4445struct spu_builtin_description spu_builtins[] = {
4446#define DEF_BUILTIN(fcode, icode, name, type, params) \
4447 {fcode, icode, name, type, params, NULL_TREE},
4448#include "spu-builtins.def"
4449#undef DEF_BUILTIN
4450};
4451
4452static void
4453spu_init_builtins (void)
4454{
4455 struct spu_builtin_description *d;
4456 unsigned int i;
4457
4458 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4459 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4460 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4461 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4462 V4SF_type_node = build_vector_type (float_type_node, 4);
4463 V2DF_type_node = build_vector_type (double_type_node, 2);
4464
4465 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4466 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4467 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4468 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4469
c4ecce0c 4470 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 4471
4472 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4473 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4474 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4475 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4476 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4477 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4478 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4479 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4480 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4481 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4482 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4483 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4484
4485 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4486 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4487 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4488 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4489 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4490 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4491 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4492 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4493
4494 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4495 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4496
4497 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4498
4499 spu_builtin_types[SPU_BTI_PTR] =
4500 build_pointer_type (build_qualified_type
4501 (void_type_node,
4502 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4503
4504 /* For each builtin we build a new prototype. The tree code will make
4505 sure nodes are shared. */
4506 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4507 {
4508 tree p;
4509 char name[64]; /* build_function will make a copy. */
4510 int parm;
4511
4512 if (d->name == 0)
4513 continue;
4514
5dfbd18f 4515 /* Find last parm. */
644459d0 4516 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 4517 ;
644459d0 4518
4519 p = void_list_node;
4520 while (parm > 1)
4521 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4522
4523 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4524
4525 sprintf (name, "__builtin_%s", d->name);
4526 d->fndecl =
4527 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4528 NULL, NULL_TREE);
a76866d3 4529 if (d->fcode == SPU_MASK_FOR_LOAD)
4530 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 4531
4532 /* These builtins don't throw. */
4533 TREE_NOTHROW (d->fndecl) = 1;
644459d0 4534 }
4535}
4536
cf31d486 4537void
4538spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4539{
4540 static unsigned char arr[16] =
4541 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4542
4543 rtx temp = gen_reg_rtx (Pmode);
4544 rtx temp2 = gen_reg_rtx (V4SImode);
4545 rtx temp3 = gen_reg_rtx (V4SImode);
4546 rtx pat = gen_reg_rtx (TImode);
4547 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4548
4549 emit_move_insn (pat, array_to_constant (TImode, arr));
4550
4551 /* Restore the sp. */
4552 emit_move_insn (temp, op1);
4553 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4554
4555 /* Compute available stack size for sp. */
4556 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4557 emit_insn (gen_shufb (temp3, temp, temp, pat));
4558
4559 emit_insn (gen_addv4si3 (sp, sp, temp3));
4560 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4561}
4562
644459d0 4563int
4564spu_safe_dma (HOST_WIDE_INT channel)
4565{
4566 return (channel >= 21 && channel <= 27);
4567}
4568
4569void
4570spu_builtin_splats (rtx ops[])
4571{
4572 enum machine_mode mode = GET_MODE (ops[0]);
4573 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4574 {
4575 unsigned char arr[16];
4576 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4577 emit_move_insn (ops[0], array_to_constant (mode, arr));
4578 }
5df189be 4579 else if (!flag_pic && GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
644459d0 4580 {
4581 rtvec v = rtvec_alloc (4);
4582 RTVEC_ELT (v, 0) = ops[1];
4583 RTVEC_ELT (v, 1) = ops[1];
4584 RTVEC_ELT (v, 2) = ops[1];
4585 RTVEC_ELT (v, 3) = ops[1];
4586 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4587 }
4588 else
4589 {
4590 rtx reg = gen_reg_rtx (TImode);
4591 rtx shuf;
4592 if (GET_CODE (ops[1]) != REG
4593 && GET_CODE (ops[1]) != SUBREG)
4594 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4595 switch (mode)
4596 {
4597 case V2DImode:
4598 case V2DFmode:
4599 shuf =
4600 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4601 TImode);
4602 break;
4603 case V4SImode:
4604 case V4SFmode:
4605 shuf =
4606 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4607 TImode);
4608 break;
4609 case V8HImode:
4610 shuf =
4611 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4612 TImode);
4613 break;
4614 case V16QImode:
4615 shuf =
4616 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4617 TImode);
4618 break;
4619 default:
4620 abort ();
4621 }
4622 emit_move_insn (reg, shuf);
4623 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4624 }
4625}
4626
4627void
4628spu_builtin_extract (rtx ops[])
4629{
4630 enum machine_mode mode;
4631 rtx rot, from, tmp;
4632
4633 mode = GET_MODE (ops[1]);
4634
4635 if (GET_CODE (ops[2]) == CONST_INT)
4636 {
4637 switch (mode)
4638 {
4639 case V16QImode:
4640 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4641 break;
4642 case V8HImode:
4643 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4644 break;
4645 case V4SFmode:
4646 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4647 break;
4648 case V4SImode:
4649 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4650 break;
4651 case V2DImode:
4652 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4653 break;
4654 case V2DFmode:
4655 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4656 break;
4657 default:
4658 abort ();
4659 }
4660 return;
4661 }
4662
4663 from = spu_gen_subreg (TImode, ops[1]);
4664 rot = gen_reg_rtx (TImode);
4665 tmp = gen_reg_rtx (SImode);
4666
4667 switch (mode)
4668 {
4669 case V16QImode:
4670 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4671 break;
4672 case V8HImode:
4673 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4674 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4675 break;
4676 case V4SFmode:
4677 case V4SImode:
4678 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4679 break;
4680 case V2DImode:
4681 case V2DFmode:
4682 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4683 break;
4684 default:
4685 abort ();
4686 }
4687 emit_insn (gen_rotqby_ti (rot, from, tmp));
4688
4689 emit_insn (gen_spu_convert (ops[0], rot));
4690}
4691
4692void
4693spu_builtin_insert (rtx ops[])
4694{
4695 enum machine_mode mode = GET_MODE (ops[0]);
4696 enum machine_mode imode = GET_MODE_INNER (mode);
4697 rtx mask = gen_reg_rtx (TImode);
4698 rtx offset;
4699
4700 if (GET_CODE (ops[3]) == CONST_INT)
4701 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4702 else
4703 {
4704 offset = gen_reg_rtx (SImode);
4705 emit_insn (gen_mulsi3
4706 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4707 }
4708 emit_insn (gen_cpat
4709 (mask, stack_pointer_rtx, offset,
4710 GEN_INT (GET_MODE_SIZE (imode))));
4711 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4712}
4713
4714void
4715spu_builtin_promote (rtx ops[])
4716{
4717 enum machine_mode mode, imode;
4718 rtx rot, from, offset;
4719 HOST_WIDE_INT pos;
4720
4721 mode = GET_MODE (ops[0]);
4722 imode = GET_MODE_INNER (mode);
4723
4724 from = gen_reg_rtx (TImode);
4725 rot = spu_gen_subreg (TImode, ops[0]);
4726
4727 emit_insn (gen_spu_convert (from, ops[1]));
4728
4729 if (GET_CODE (ops[2]) == CONST_INT)
4730 {
4731 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4732 if (GET_MODE_SIZE (imode) < 4)
4733 pos += 4 - GET_MODE_SIZE (imode);
4734 offset = GEN_INT (pos & 15);
4735 }
4736 else
4737 {
4738 offset = gen_reg_rtx (SImode);
4739 switch (mode)
4740 {
4741 case V16QImode:
4742 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4743 break;
4744 case V8HImode:
4745 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4746 emit_insn (gen_addsi3 (offset, offset, offset));
4747 break;
4748 case V4SFmode:
4749 case V4SImode:
4750 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4751 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4752 break;
4753 case V2DImode:
4754 case V2DFmode:
4755 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4756 break;
4757 default:
4758 abort ();
4759 }
4760 }
4761 emit_insn (gen_rotqby_ti (rot, from, offset));
4762}
4763
4764void
4765spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4766{
4767 rtx shuf = gen_reg_rtx (V4SImode);
4768 rtx insn = gen_reg_rtx (V4SImode);
4769 rtx shufc;
4770 rtx insnc;
4771 rtx mem;
4772
4773 fnaddr = force_reg (SImode, fnaddr);
4774 cxt = force_reg (SImode, cxt);
4775
4776 if (TARGET_LARGE_MEM)
4777 {
4778 rtx rotl = gen_reg_rtx (V4SImode);
4779 rtx mask = gen_reg_rtx (V4SImode);
4780 rtx bi = gen_reg_rtx (SImode);
4781 unsigned char shufa[16] = {
4782 2, 3, 0, 1, 18, 19, 16, 17,
4783 0, 1, 2, 3, 16, 17, 18, 19
4784 };
4785 unsigned char insna[16] = {
4786 0x41, 0, 0, 79,
4787 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4788 0x60, 0x80, 0, 79,
4789 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4790 };
4791
4792 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4793 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4794
4795 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 4796 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 4797 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4798 emit_insn (gen_selb (insn, insnc, rotl, mask));
4799
4800 mem = memory_address (Pmode, tramp);
4801 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4802
4803 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4804 mem = memory_address (Pmode, plus_constant (tramp, 16));
4805 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4806 }
4807 else
4808 {
4809 rtx scxt = gen_reg_rtx (SImode);
4810 rtx sfnaddr = gen_reg_rtx (SImode);
4811 unsigned char insna[16] = {
4812 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4813 0x30, 0, 0, 0,
4814 0, 0, 0, 0,
4815 0, 0, 0, 0
4816 };
4817
4818 shufc = gen_reg_rtx (TImode);
4819 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4820
4821 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4822 fits 18 bits and the last 4 are zeros. This will be true if
4823 the stack pointer is initialized to 0x3fff0 at program start,
4824 otherwise the ila instruction will be garbage. */
4825
4826 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4827 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4828 emit_insn (gen_cpat
4829 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4830 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4831 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4832
4833 mem = memory_address (Pmode, tramp);
4834 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4835
4836 }
4837 emit_insn (gen_sync ());
4838}
4839
4840void
4841spu_expand_sign_extend (rtx ops[])
4842{
4843 unsigned char arr[16];
4844 rtx pat = gen_reg_rtx (TImode);
4845 rtx sign, c;
4846 int i, last;
4847 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4848 if (GET_MODE (ops[1]) == QImode)
4849 {
4850 sign = gen_reg_rtx (HImode);
4851 emit_insn (gen_extendqihi2 (sign, ops[1]));
4852 for (i = 0; i < 16; i++)
4853 arr[i] = 0x12;
4854 arr[last] = 0x13;
4855 }
4856 else
4857 {
4858 for (i = 0; i < 16; i++)
4859 arr[i] = 0x10;
4860 switch (GET_MODE (ops[1]))
4861 {
4862 case HImode:
4863 sign = gen_reg_rtx (SImode);
4864 emit_insn (gen_extendhisi2 (sign, ops[1]));
4865 arr[last] = 0x03;
4866 arr[last - 1] = 0x02;
4867 break;
4868 case SImode:
4869 sign = gen_reg_rtx (SImode);
4870 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4871 for (i = 0; i < 4; i++)
4872 arr[last - i] = 3 - i;
4873 break;
4874 case DImode:
4875 sign = gen_reg_rtx (SImode);
4876 c = gen_reg_rtx (SImode);
4877 emit_insn (gen_spu_convert (c, ops[1]));
4878 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4879 for (i = 0; i < 8; i++)
4880 arr[last - i] = 7 - i;
4881 break;
4882 default:
4883 abort ();
4884 }
4885 }
4886 emit_move_insn (pat, array_to_constant (TImode, arr));
4887 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4888}
4889
4890/* expand vector initialization. If there are any constant parts,
4891 load constant parts first. Then load any non-constant parts. */
4892void
4893spu_expand_vector_init (rtx target, rtx vals)
4894{
4895 enum machine_mode mode = GET_MODE (target);
4896 int n_elts = GET_MODE_NUNITS (mode);
4897 int n_var = 0;
4898 bool all_same = true;
790c536c 4899 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 4900 int i;
4901
4902 first = XVECEXP (vals, 0, 0);
4903 for (i = 0; i < n_elts; ++i)
4904 {
4905 x = XVECEXP (vals, 0, i);
4906 if (!CONSTANT_P (x))
4907 ++n_var;
4908 else
4909 {
4910 if (first_constant == NULL_RTX)
4911 first_constant = x;
4912 }
4913 if (i > 0 && !rtx_equal_p (x, first))
4914 all_same = false;
4915 }
4916
4917 /* if all elements are the same, use splats to repeat elements */
4918 if (all_same)
4919 {
4920 if (!CONSTANT_P (first)
4921 && !register_operand (first, GET_MODE (x)))
4922 first = force_reg (GET_MODE (first), first);
4923 emit_insn (gen_spu_splats (target, first));
4924 return;
4925 }
4926
4927 /* load constant parts */
4928 if (n_var != n_elts)
4929 {
4930 if (n_var == 0)
4931 {
4932 emit_move_insn (target,
4933 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4934 }
4935 else
4936 {
4937 rtx constant_parts_rtx = copy_rtx (vals);
4938
4939 gcc_assert (first_constant != NULL_RTX);
4940 /* fill empty slots with the first constant, this increases
4941 our chance of using splats in the recursive call below. */
4942 for (i = 0; i < n_elts; ++i)
4943 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4944 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4945
4946 spu_expand_vector_init (target, constant_parts_rtx);
4947 }
4948 }
4949
4950 /* load variable parts */
4951 if (n_var != 0)
4952 {
4953 rtx insert_operands[4];
4954
4955 insert_operands[0] = target;
4956 insert_operands[2] = target;
4957 for (i = 0; i < n_elts; ++i)
4958 {
4959 x = XVECEXP (vals, 0, i);
4960 if (!CONSTANT_P (x))
4961 {
4962 if (!register_operand (x, GET_MODE (x)))
4963 x = force_reg (GET_MODE (x), x);
4964 insert_operands[1] = x;
4965 insert_operands[3] = GEN_INT (i);
4966 spu_builtin_insert (insert_operands);
4967 }
4968 }
4969 }
4970}
6352eedf 4971
5474166e 4972/* Return insn index for the vector compare instruction for given CODE,
4973 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4974
4975static int
4976get_vec_cmp_insn (enum rtx_code code,
4977 enum machine_mode dest_mode,
4978 enum machine_mode op_mode)
4979
4980{
4981 switch (code)
4982 {
4983 case EQ:
4984 if (dest_mode == V16QImode && op_mode == V16QImode)
4985 return CODE_FOR_ceq_v16qi;
4986 if (dest_mode == V8HImode && op_mode == V8HImode)
4987 return CODE_FOR_ceq_v8hi;
4988 if (dest_mode == V4SImode && op_mode == V4SImode)
4989 return CODE_FOR_ceq_v4si;
4990 if (dest_mode == V4SImode && op_mode == V4SFmode)
4991 return CODE_FOR_ceq_v4sf;
4992 if (dest_mode == V2DImode && op_mode == V2DFmode)
4993 return CODE_FOR_ceq_v2df;
4994 break;
4995 case GT:
4996 if (dest_mode == V16QImode && op_mode == V16QImode)
4997 return CODE_FOR_cgt_v16qi;
4998 if (dest_mode == V8HImode && op_mode == V8HImode)
4999 return CODE_FOR_cgt_v8hi;
5000 if (dest_mode == V4SImode && op_mode == V4SImode)
5001 return CODE_FOR_cgt_v4si;
5002 if (dest_mode == V4SImode && op_mode == V4SFmode)
5003 return CODE_FOR_cgt_v4sf;
5004 if (dest_mode == V2DImode && op_mode == V2DFmode)
5005 return CODE_FOR_cgt_v2df;
5006 break;
5007 case GTU:
5008 if (dest_mode == V16QImode && op_mode == V16QImode)
5009 return CODE_FOR_clgt_v16qi;
5010 if (dest_mode == V8HImode && op_mode == V8HImode)
5011 return CODE_FOR_clgt_v8hi;
5012 if (dest_mode == V4SImode && op_mode == V4SImode)
5013 return CODE_FOR_clgt_v4si;
5014 break;
5015 default:
5016 break;
5017 }
5018 return -1;
5019}
5020
5021/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5022 DMODE is expected destination mode. This is a recursive function. */
5023
5024static rtx
5025spu_emit_vector_compare (enum rtx_code rcode,
5026 rtx op0, rtx op1,
5027 enum machine_mode dmode)
5028{
5029 int vec_cmp_insn;
5030 rtx mask;
5031 enum machine_mode dest_mode;
5032 enum machine_mode op_mode = GET_MODE (op1);
5033
5034 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5035
5036 /* Floating point vector compare instructions uses destination V4SImode.
5037 Double floating point vector compare instructions uses destination V2DImode.
5038 Move destination to appropriate mode later. */
5039 if (dmode == V4SFmode)
5040 dest_mode = V4SImode;
5041 else if (dmode == V2DFmode)
5042 dest_mode = V2DImode;
5043 else
5044 dest_mode = dmode;
5045
5046 mask = gen_reg_rtx (dest_mode);
5047 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5048
5049 if (vec_cmp_insn == -1)
5050 {
5051 bool swap_operands = false;
5052 bool try_again = false;
5053 switch (rcode)
5054 {
5055 case LT:
5056 rcode = GT;
5057 swap_operands = true;
5058 try_again = true;
5059 break;
5060 case LTU:
5061 rcode = GTU;
5062 swap_operands = true;
5063 try_again = true;
5064 break;
5065 case NE:
5066 /* Treat A != B as ~(A==B). */
5067 {
5068 enum insn_code nor_code;
5069 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5070 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5071 gcc_assert (nor_code != CODE_FOR_nothing);
5072 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5073 if (dmode != dest_mode)
5074 {
5075 rtx temp = gen_reg_rtx (dest_mode);
5076 convert_move (temp, mask, 0);
5077 return temp;
5078 }
5079 return mask;
5080 }
5081 break;
5082 case GE:
5083 case GEU:
5084 case LE:
5085 case LEU:
5086 /* Try GT/GTU/LT/LTU OR EQ */
5087 {
5088 rtx c_rtx, eq_rtx;
5089 enum insn_code ior_code;
5090 enum rtx_code new_code;
5091
5092 switch (rcode)
5093 {
5094 case GE: new_code = GT; break;
5095 case GEU: new_code = GTU; break;
5096 case LE: new_code = LT; break;
5097 case LEU: new_code = LTU; break;
5098 default:
5099 gcc_unreachable ();
5100 }
5101
5102 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5103 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5104
99bdde56 5105 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5106 gcc_assert (ior_code != CODE_FOR_nothing);
5107 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5108 if (dmode != dest_mode)
5109 {
5110 rtx temp = gen_reg_rtx (dest_mode);
5111 convert_move (temp, mask, 0);
5112 return temp;
5113 }
5114 return mask;
5115 }
5116 break;
5117 default:
5118 gcc_unreachable ();
5119 }
5120
5121 /* You only get two chances. */
5122 if (try_again)
5123 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5124
5125 gcc_assert (vec_cmp_insn != -1);
5126
5127 if (swap_operands)
5128 {
5129 rtx tmp;
5130 tmp = op0;
5131 op0 = op1;
5132 op1 = tmp;
5133 }
5134 }
5135
5136 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5137 if (dmode != dest_mode)
5138 {
5139 rtx temp = gen_reg_rtx (dest_mode);
5140 convert_move (temp, mask, 0);
5141 return temp;
5142 }
5143 return mask;
5144}
5145
5146
5147/* Emit vector conditional expression.
5148 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5149 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5150
5151int
5152spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5153 rtx cond, rtx cc_op0, rtx cc_op1)
5154{
5155 enum machine_mode dest_mode = GET_MODE (dest);
5156 enum rtx_code rcode = GET_CODE (cond);
5157 rtx mask;
5158
5159 /* Get the vector mask for the given relational operations. */
5160 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5161
5162 emit_insn(gen_selb (dest, op2, op1, mask));
5163
5164 return 1;
5165}
5166
6352eedf 5167static rtx
5168spu_force_reg (enum machine_mode mode, rtx op)
5169{
5170 rtx x, r;
5171 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5172 {
5173 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5174 || GET_MODE (op) == BLKmode)
5175 return force_reg (mode, convert_to_mode (mode, op, 0));
5176 abort ();
5177 }
5178
5179 r = force_reg (GET_MODE (op), op);
5180 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5181 {
5182 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5183 if (x)
5184 return x;
5185 }
5186
5187 x = gen_reg_rtx (mode);
5188 emit_insn (gen_spu_convert (x, r));
5189 return x;
5190}
5191
5192static void
5193spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5194{
5195 HOST_WIDE_INT v = 0;
5196 int lsbits;
5197 /* Check the range of immediate operands. */
5198 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5199 {
5200 int range = p - SPU_BTI_7;
5df189be 5201
5202 if (!CONSTANT_P (op))
6352eedf 5203 error ("%s expects an integer literal in the range [%d, %d].",
5204 d->name,
5205 spu_builtin_range[range].low, spu_builtin_range[range].high);
5206
5207 if (GET_CODE (op) == CONST
5208 && (GET_CODE (XEXP (op, 0)) == PLUS
5209 || GET_CODE (XEXP (op, 0)) == MINUS))
5210 {
5211 v = INTVAL (XEXP (XEXP (op, 0), 1));
5212 op = XEXP (XEXP (op, 0), 0);
5213 }
5214 else if (GET_CODE (op) == CONST_INT)
5215 v = INTVAL (op);
5df189be 5216 else if (GET_CODE (op) == CONST_VECTOR
5217 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5218 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5219
5220 /* The default for v is 0 which is valid in every range. */
5221 if (v < spu_builtin_range[range].low
5222 || v > spu_builtin_range[range].high)
5223 error ("%s expects an integer literal in the range [%d, %d]. ("
5224 HOST_WIDE_INT_PRINT_DEC ")",
5225 d->name,
5226 spu_builtin_range[range].low, spu_builtin_range[range].high,
5227 v);
6352eedf 5228
5229 switch (p)
5230 {
5231 case SPU_BTI_S10_4:
5232 lsbits = 4;
5233 break;
5234 case SPU_BTI_U16_2:
5235 /* This is only used in lqa, and stqa. Even though the insns
5236 encode 16 bits of the address (all but the 2 least
5237 significant), only 14 bits are used because it is masked to
5238 be 16 byte aligned. */
5239 lsbits = 4;
5240 break;
5241 case SPU_BTI_S16_2:
5242 /* This is used for lqr and stqr. */
5243 lsbits = 2;
5244 break;
5245 default:
5246 lsbits = 0;
5247 }
5248
5249 if (GET_CODE (op) == LABEL_REF
5250 || (GET_CODE (op) == SYMBOL_REF
5251 && SYMBOL_REF_FUNCTION_P (op))
5df189be 5252 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 5253 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5254 d->name);
5255 }
5256}
5257
5258
5259static void
5df189be 5260expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 5261 rtx target, rtx ops[])
5262{
5263 enum insn_code icode = d->icode;
5df189be 5264 int i = 0, a;
6352eedf 5265
5266 /* Expand the arguments into rtl. */
5267
5268 if (d->parm[0] != SPU_BTI_VOID)
5269 ops[i++] = target;
5270
5df189be 5271 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
6352eedf 5272 {
5df189be 5273 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 5274 if (arg == 0)
5275 abort ();
5276 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
6352eedf 5277 }
5278}
5279
5280static rtx
5281spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 5282 tree exp, rtx target)
6352eedf 5283{
5284 rtx pat;
5285 rtx ops[8];
5286 enum insn_code icode = d->icode;
5287 enum machine_mode mode, tmode;
5288 int i, p;
5289 tree return_type;
5290
5291 /* Set up ops[] with values from arglist. */
5df189be 5292 expand_builtin_args (d, exp, target, ops);
6352eedf 5293
5294 /* Handle the target operand which must be operand 0. */
5295 i = 0;
5296 if (d->parm[0] != SPU_BTI_VOID)
5297 {
5298
5299 /* We prefer the mode specified for the match_operand otherwise
5300 use the mode from the builtin function prototype. */
5301 tmode = insn_data[d->icode].operand[0].mode;
5302 if (tmode == VOIDmode)
5303 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5304
5305 /* Try to use target because not using it can lead to extra copies
5306 and when we are using all of the registers extra copies leads
5307 to extra spills. */
5308 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5309 ops[0] = target;
5310 else
5311 target = ops[0] = gen_reg_rtx (tmode);
5312
5313 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5314 abort ();
5315
5316 i++;
5317 }
5318
a76866d3 5319 if (d->fcode == SPU_MASK_FOR_LOAD)
5320 {
5321 enum machine_mode mode = insn_data[icode].operand[1].mode;
5322 tree arg;
5323 rtx addr, op, pat;
5324
5325 /* get addr */
5df189be 5326 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 5327 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5328 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5329 addr = memory_address (mode, op);
5330
5331 /* negate addr */
5332 op = gen_reg_rtx (GET_MODE (addr));
5333 emit_insn (gen_rtx_SET (VOIDmode, op,
5334 gen_rtx_NEG (GET_MODE (addr), addr)));
5335 op = gen_rtx_MEM (mode, op);
5336
5337 pat = GEN_FCN (icode) (target, op);
5338 if (!pat)
5339 return 0;
5340 emit_insn (pat);
5341 return target;
5342 }
5343
6352eedf 5344 /* Ignore align_hint, but still expand it's args in case they have
5345 side effects. */
5346 if (icode == CODE_FOR_spu_align_hint)
5347 return 0;
5348
5349 /* Handle the rest of the operands. */
5350 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5351 {
5352 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5353 mode = insn_data[d->icode].operand[i].mode;
5354 else
5355 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5356
5357 /* mode can be VOIDmode here for labels */
5358
5359 /* For specific intrinsics with an immediate operand, e.g.,
5360 si_ai(), we sometimes need to convert the scalar argument to a
5361 vector argument by splatting the scalar. */
5362 if (VECTOR_MODE_P (mode)
5363 && (GET_CODE (ops[i]) == CONST_INT
5364 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 5365 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 5366 {
5367 if (GET_CODE (ops[i]) == CONST_INT)
5368 ops[i] = spu_const (mode, INTVAL (ops[i]));
5369 else
5370 {
5371 rtx reg = gen_reg_rtx (mode);
5372 enum machine_mode imode = GET_MODE_INNER (mode);
5373 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5374 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5375 if (imode != GET_MODE (ops[i]))
5376 ops[i] = convert_to_mode (imode, ops[i],
5377 TYPE_UNSIGNED (spu_builtin_types
5378 [d->parm[i]]));
5379 emit_insn (gen_spu_splats (reg, ops[i]));
5380 ops[i] = reg;
5381 }
5382 }
5383
5df189be 5384 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5385
6352eedf 5386 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5387 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 5388 }
5389
5390 switch (insn_data[icode].n_operands)
5391 {
5392 case 0:
5393 pat = GEN_FCN (icode) (0);
5394 break;
5395 case 1:
5396 pat = GEN_FCN (icode) (ops[0]);
5397 break;
5398 case 2:
5399 pat = GEN_FCN (icode) (ops[0], ops[1]);
5400 break;
5401 case 3:
5402 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5403 break;
5404 case 4:
5405 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5406 break;
5407 case 5:
5408 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5409 break;
5410 case 6:
5411 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5412 break;
5413 default:
5414 abort ();
5415 }
5416
5417 if (!pat)
5418 abort ();
5419
5420 if (d->type == B_CALL || d->type == B_BISLED)
5421 emit_call_insn (pat);
5422 else if (d->type == B_JUMP)
5423 {
5424 emit_jump_insn (pat);
5425 emit_barrier ();
5426 }
5427 else
5428 emit_insn (pat);
5429
5430 return_type = spu_builtin_types[d->parm[0]];
5431 if (d->parm[0] != SPU_BTI_VOID
5432 && GET_MODE (target) != TYPE_MODE (return_type))
5433 {
5434 /* target is the return value. It should always be the mode of
5435 the builtin function prototype. */
5436 target = spu_force_reg (TYPE_MODE (return_type), target);
5437 }
5438
5439 return target;
5440}
5441
5442rtx
5443spu_expand_builtin (tree exp,
5444 rtx target,
5445 rtx subtarget ATTRIBUTE_UNUSED,
5446 enum machine_mode mode ATTRIBUTE_UNUSED,
5447 int ignore ATTRIBUTE_UNUSED)
5448{
5df189be 5449 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 5450 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 5451 struct spu_builtin_description *d;
5452
5453 if (fcode < NUM_SPU_BUILTINS)
5454 {
5455 d = &spu_builtins[fcode];
5456
5df189be 5457 return spu_expand_builtin_1 (d, exp, target);
6352eedf 5458 }
5459 abort ();
5460}
5461
e99f512d 5462/* Implement targetm.vectorize.builtin_mul_widen_even. */
5463static tree
5464spu_builtin_mul_widen_even (tree type)
5465{
e99f512d 5466 switch (TYPE_MODE (type))
5467 {
5468 case V8HImode:
5469 if (TYPE_UNSIGNED (type))
5470 return spu_builtins[SPU_MULE_0].fndecl;
5471 else
5472 return spu_builtins[SPU_MULE_1].fndecl;
5473 break;
5474 default:
5475 return NULL_TREE;
5476 }
5477}
5478
5479/* Implement targetm.vectorize.builtin_mul_widen_odd. */
5480static tree
5481spu_builtin_mul_widen_odd (tree type)
5482{
5483 switch (TYPE_MODE (type))
5484 {
5485 case V8HImode:
5486 if (TYPE_UNSIGNED (type))
5487 return spu_builtins[SPU_MULO_1].fndecl;
5488 else
5489 return spu_builtins[SPU_MULO_0].fndecl;
5490 break;
5491 default:
5492 return NULL_TREE;
5493 }
5494}
5495
a76866d3 5496/* Implement targetm.vectorize.builtin_mask_for_load. */
5497static tree
5498spu_builtin_mask_for_load (void)
5499{
5500 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5501 gcc_assert (d);
5502 return d->fndecl;
5503}
5df189be 5504
a28df51d 5505/* Implement targetm.vectorize.builtin_vectorization_cost. */
5506static int
5507spu_builtin_vectorization_cost (bool runtime_test)
5508{
5509 /* If the branch of the runtime test is taken - i.e. - the vectorized
5510 version is skipped - this incurs a misprediction cost (because the
5511 vectorized version is expected to be the fall-through). So we subtract
becfaa62 5512 the latency of a mispredicted branch from the costs that are incurred
a28df51d 5513 when the vectorized version is executed. */
5514 if (runtime_test)
5515 return -19;
5516 else
5517 return 0;
5518}
5519
0e87db76 5520/* Return true iff, data reference of TYPE can reach vector alignment (16)
5521 after applying N number of iterations. This routine does not determine
5522 how may iterations are required to reach desired alignment. */
5523
5524static bool
a9f1838b 5525spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 5526{
5527 if (is_packed)
5528 return false;
5529
5530 /* All other types are naturally aligned. */
5531 return true;
5532}
5533
d52fd16a 5534/* Count the total number of instructions in each pipe and return the
5535 maximum, which is used as the Minimum Iteration Interval (MII)
5536 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5537 -2 are instructions that can go in pipe0 or pipe1. */
5538static int
5539spu_sms_res_mii (struct ddg *g)
5540{
5541 int i;
5542 unsigned t[4] = {0, 0, 0, 0};
5543
5544 for (i = 0; i < g->num_nodes; i++)
5545 {
5546 rtx insn = g->nodes[i].insn;
5547 int p = get_pipe (insn) + 2;
5548
5549 assert (p >= 0);
5550 assert (p < 4);
5551
5552 t[p]++;
5553 if (dump_file && INSN_P (insn))
5554 fprintf (dump_file, "i%d %s %d %d\n",
5555 INSN_UID (insn),
5556 insn_data[INSN_CODE(insn)].name,
5557 p, t[p]);
5558 }
5559 if (dump_file)
5560 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
5561
5562 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
5563}
5564
5565
5df189be 5566void
5567spu_init_expanders (void)
5568{
5569 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5570 * frame_pointer_needed is true. We don't know that until we're
5571 * expanding the prologue. */
5572 if (cfun)
5573 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 5574}
5575
5576static enum machine_mode
5577spu_libgcc_cmp_return_mode (void)
5578{
5579
5580/* For SPU word mode is TI mode so it is better to use SImode
5581 for compare returns. */
5582 return SImode;
5583}
5584
5585static enum machine_mode
5586spu_libgcc_shift_count_mode (void)
5587{
5588/* For SPU word mode is TI mode so it is better to use SImode
5589 for shift counts. */
5590 return SImode;
5591}