1 /* Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
31 #include "stringpool.h"
32 #include "stor-layout.h"
40 #include "basic-block.h"
41 #include "diagnostic-core.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "sched-int.h"
52 #include "pointer-set.h"
53 #include "hash-table.h"
54 #include "tree-ssa-alias.h"
55 #include "internal-fn.h"
56 #include "gimple-fold.h"
58 #include "gimple-expr.h"
62 #include "tm-constrs.h"
70 /* Builtin types, data and prototypes. */
72 enum spu_builtin_type_index
74 SPU_BTI_END_OF_PARAMS
,
76 /* We create new type nodes for these. */
88 /* A 16-byte type. (Implemented with V16QI_type_node) */
91 /* These all correspond to intSI_type_node */
105 /* These correspond to the standard types */
125 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
126 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
127 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
128 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
129 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
130 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
131 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
132 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
133 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
134 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
136 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
138 struct spu_builtin_range
143 static struct spu_builtin_range spu_builtin_range
[] = {
144 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
145 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
146 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
147 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
148 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
149 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
150 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
151 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
152 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
153 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
154 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
155 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
159 /* Target specific attribute specifications. */
160 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
162 /* Prototypes and external defs. */
163 static int get_pipe (rtx insn
);
164 static int spu_naked_function_p (tree func
);
165 static int mem_is_padded_component_ref (rtx x
);
166 static void fix_range (const char *);
167 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
169 /* Which instruction set architecture to use. */
171 /* Which cpu are we tuning for. */
174 /* The hardware requires 8 insns between a hint and the branch it
175 effects. This variable describes how many rtl instructions the
176 compiler needs to see before inserting a hint, and then the compiler
177 will insert enough nops to make it at least 8 insns. The default is
178 for the compiler to allow up to 2 nops be emitted. The nops are
179 inserted in pairs, so we round down. */
180 int spu_hint_dist
= (8*4) - (2*4);
195 IC_POOL
, /* constant pool */
196 IC_IL1
, /* one il* instruction */
197 IC_IL2
, /* both ilhu and iohl instructions */
198 IC_IL1s
, /* one il* instruction */
199 IC_IL2s
, /* both ilhu and iohl instructions */
200 IC_FSMBI
, /* the fsmbi instruction */
201 IC_CPAT
, /* one of the c*d instructions */
202 IC_FSMBI2
/* fsmbi plus 1 other instruction */
205 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
206 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
207 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
208 static enum immediate_class
classify_immediate (rtx op
,
209 enum machine_mode mode
);
211 /* Pointer mode for __ea references. */
212 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
215 /* Define the structure for the machine field in struct function. */
216 struct GTY(()) machine_function
218 /* Register to use for PIC accesses. */
222 /* How to allocate a 'struct machine_function'. */
223 static struct machine_function
*
224 spu_init_machine_status (void)
226 return ggc_alloc_cleared_machine_function ();
229 /* Implement TARGET_OPTION_OVERRIDE. */
231 spu_option_override (void)
233 /* Set up function hooks. */
234 init_machine_status
= spu_init_machine_status
;
236 /* Small loops will be unpeeled at -O3. For SPU it is more important
237 to keep code small by default. */
238 if (!flag_unroll_loops
&& !flag_peel_loops
)
239 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 4,
240 global_options
.x_param_values
,
241 global_options_set
.x_param_values
);
243 flag_omit_frame_pointer
= 1;
245 /* Functions must be 8 byte aligned so we correctly handle dual issue */
246 if (align_functions
< 8)
249 spu_hint_dist
= 8*4 - spu_max_nops
*4;
250 if (spu_hint_dist
< 0)
253 if (spu_fixed_range_string
)
254 fix_range (spu_fixed_range_string
);
256 /* Determine processor architectural level. */
259 if (strcmp (&spu_arch_string
[0], "cell") == 0)
260 spu_arch
= PROCESSOR_CELL
;
261 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
262 spu_arch
= PROCESSOR_CELLEDP
;
264 error ("bad value (%s) for -march= switch", spu_arch_string
);
267 /* Determine processor to tune for. */
270 if (strcmp (&spu_tune_string
[0], "cell") == 0)
271 spu_tune
= PROCESSOR_CELL
;
272 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
273 spu_tune
= PROCESSOR_CELLEDP
;
275 error ("bad value (%s) for -mtune= switch", spu_tune_string
);
278 /* Change defaults according to the processor architecture. */
279 if (spu_arch
== PROCESSOR_CELLEDP
)
281 /* If no command line option has been otherwise specified, change
282 the default to -mno-safe-hints on celledp -- only the original
283 Cell/B.E. processors require this workaround. */
284 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
285 target_flags
&= ~MASK_SAFE_HINTS
;
288 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
291 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
292 struct attribute_spec.handler. */
294 /* True if MODE is valid for the target. By "valid", we mean able to
295 be manipulated in non-trivial ways. In particular, this means all
296 the arithmetic is supported. */
298 spu_scalar_mode_supported_p (enum machine_mode mode
)
316 /* Similarly for vector modes. "Supported" here is less strict. At
317 least some operations are supported; need to check optabs or builtins
318 for further details. */
320 spu_vector_mode_supported_p (enum machine_mode mode
)
337 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
338 least significant bytes of the outer mode. This function returns
339 TRUE for the SUBREG's where this is correct. */
341 valid_subreg (rtx op
)
343 enum machine_mode om
= GET_MODE (op
);
344 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
345 return om
!= VOIDmode
&& im
!= VOIDmode
346 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
347 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
348 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
351 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
352 and adjust the start offset. */
354 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
356 enum machine_mode mode
;
358 /* Strip any paradoxical SUBREG. */
359 if (GET_CODE (op
) == SUBREG
360 && (GET_MODE_BITSIZE (GET_MODE (op
))
361 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
365 GET_MODE_BITSIZE (GET_MODE (op
)) -
366 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
367 op
= SUBREG_REG (op
);
369 /* If it is smaller than SI, assure a SUBREG */
370 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
374 *start
+= 32 - op_size
;
377 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
378 mode
= mode_for_size (op_size
, MODE_INT
, 0);
379 if (mode
!= GET_MODE (op
))
380 op
= gen_rtx_SUBREG (mode
, op
, 0);
385 spu_expand_extv (rtx ops
[], int unsignedp
)
387 rtx dst
= ops
[0], src
= ops
[1];
388 HOST_WIDE_INT width
= INTVAL (ops
[2]);
389 HOST_WIDE_INT start
= INTVAL (ops
[3]);
390 HOST_WIDE_INT align_mask
;
391 rtx s0
, s1
, mask
, r0
;
393 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
397 /* First, determine if we need 1 TImode load or 2. We need only 1
398 if the bits being extracted do not cross the alignment boundary
399 as determined by the MEM and its address. */
401 align_mask
= -MEM_ALIGN (src
);
402 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
404 /* Alignment is sufficient for 1 load. */
405 s0
= gen_reg_rtx (TImode
);
406 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
409 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
414 s0
= gen_reg_rtx (TImode
);
415 s1
= gen_reg_rtx (TImode
);
416 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
419 gcc_assert (start
+ width
<= 128);
422 rtx r1
= gen_reg_rtx (SImode
);
423 mask
= gen_reg_rtx (TImode
);
424 emit_move_insn (mask
, GEN_INT (-1));
425 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
426 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
427 if (GET_CODE (r0
) == CONST_INT
)
428 r1
= GEN_INT (INTVAL (r0
) & 15);
430 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
431 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
432 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
437 else if (GET_CODE (src
) == SUBREG
)
439 rtx r
= SUBREG_REG (src
);
440 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
441 s0
= gen_reg_rtx (TImode
);
442 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
443 emit_insn (gen_rtx_SET (VOIDmode
, s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
445 emit_move_insn (s0
, src
);
449 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
450 s0
= gen_reg_rtx (TImode
);
451 emit_move_insn (s0
, src
);
454 /* Now s0 is TImode and contains the bits to extract at start. */
457 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
460 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, 128 - width
, s0
, unsignedp
);
462 emit_move_insn (dst
, s0
);
466 spu_expand_insv (rtx ops
[])
468 HOST_WIDE_INT width
= INTVAL (ops
[1]);
469 HOST_WIDE_INT start
= INTVAL (ops
[2]);
470 HOST_WIDE_INT maskbits
;
471 enum machine_mode dst_mode
;
472 rtx dst
= ops
[0], src
= ops
[3];
479 if (GET_CODE (ops
[0]) == MEM
)
480 dst
= gen_reg_rtx (TImode
);
482 dst
= adjust_operand (dst
, &start
);
483 dst_mode
= GET_MODE (dst
);
484 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
486 if (CONSTANT_P (src
))
488 enum machine_mode m
=
489 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
490 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
492 src
= adjust_operand (src
, 0);
494 mask
= gen_reg_rtx (dst_mode
);
495 shift_reg
= gen_reg_rtx (dst_mode
);
496 shift
= dst_size
- start
- width
;
498 /* It's not safe to use subreg here because the compiler assumes
499 that the SUBREG_REG is right justified in the SUBREG. */
500 convert_move (shift_reg
, src
, 1);
507 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
510 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
513 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
525 maskbits
= (-1ll << (32 - width
- start
));
527 maskbits
+= (1ll << (32 - start
));
528 emit_move_insn (mask
, GEN_INT (maskbits
));
531 maskbits
= (-1ll << (64 - width
- start
));
533 maskbits
+= (1ll << (64 - start
));
534 emit_move_insn (mask
, GEN_INT (maskbits
));
538 unsigned char arr
[16];
540 memset (arr
, 0, sizeof (arr
));
541 arr
[i
] = 0xff >> (start
& 7);
542 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
544 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
545 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
551 if (GET_CODE (ops
[0]) == MEM
)
553 rtx low
= gen_reg_rtx (SImode
);
554 rtx rotl
= gen_reg_rtx (SImode
);
555 rtx mask0
= gen_reg_rtx (TImode
);
561 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
562 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
563 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
564 emit_insn (gen_negsi2 (rotl
, low
));
565 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
566 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
567 mem
= change_address (ops
[0], TImode
, addr0
);
568 set_mem_alias_set (mem
, 0);
569 emit_move_insn (dst
, mem
);
570 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
571 if (start
+ width
> MEM_ALIGN (ops
[0]))
573 rtx shl
= gen_reg_rtx (SImode
);
574 rtx mask1
= gen_reg_rtx (TImode
);
575 rtx dst1
= gen_reg_rtx (TImode
);
577 addr1
= plus_constant (Pmode
, addr
, 16);
578 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
579 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
580 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
581 mem1
= change_address (ops
[0], TImode
, addr1
);
582 set_mem_alias_set (mem1
, 0);
583 emit_move_insn (dst1
, mem1
);
584 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
585 emit_move_insn (mem1
, dst1
);
587 emit_move_insn (mem
, dst
);
590 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
595 spu_expand_block_move (rtx ops
[])
597 HOST_WIDE_INT bytes
, align
, offset
;
598 rtx src
, dst
, sreg
, dreg
, target
;
600 if (GET_CODE (ops
[2]) != CONST_INT
601 || GET_CODE (ops
[3]) != CONST_INT
602 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
605 bytes
= INTVAL (ops
[2]);
606 align
= INTVAL (ops
[3]);
616 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
618 dst
= adjust_address (ops
[0], V16QImode
, offset
);
619 src
= adjust_address (ops
[1], V16QImode
, offset
);
620 emit_move_insn (dst
, src
);
625 unsigned char arr
[16] = { 0 };
626 for (i
= 0; i
< bytes
- offset
; i
++)
628 dst
= adjust_address (ops
[0], V16QImode
, offset
);
629 src
= adjust_address (ops
[1], V16QImode
, offset
);
630 mask
= gen_reg_rtx (V16QImode
);
631 sreg
= gen_reg_rtx (V16QImode
);
632 dreg
= gen_reg_rtx (V16QImode
);
633 target
= gen_reg_rtx (V16QImode
);
634 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
635 emit_move_insn (dreg
, dst
);
636 emit_move_insn (sreg
, src
);
637 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
638 emit_move_insn (dst
, target
);
646 { SPU_EQ
, SPU_GT
, SPU_GTU
};
648 int spu_comp_icode
[12][3] = {
649 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
650 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
651 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
652 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
653 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
654 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
655 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
656 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
657 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
658 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
659 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
660 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
663 /* Generate a compare for CODE. Return a brand-new rtx that represents
664 the result of the compare. GCC can figure this out too if we don't
665 provide all variations of compares, but GCC always wants to use
666 WORD_MODE, we can generate better code in most cases if we do it
669 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
671 int reverse_compare
= 0;
672 int reverse_test
= 0;
673 rtx compare_result
, eq_result
;
674 rtx comp_rtx
, eq_rtx
;
675 enum machine_mode comp_mode
;
676 enum machine_mode op_mode
;
677 enum spu_comp_code scode
, eq_code
;
678 enum insn_code ior_code
;
679 enum rtx_code code
= GET_CODE (cmp
);
680 rtx op0
= XEXP (cmp
, 0);
681 rtx op1
= XEXP (cmp
, 1);
685 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
686 and so on, to keep the constant in operand 1. */
687 if (GET_CODE (op1
) == CONST_INT
)
689 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
690 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
714 /* However, if we generate an integer result, performing a reverse test
715 would require an extra negation, so avoid that where possible. */
716 if (GET_CODE (op1
) == CONST_INT
&& is_set
== 1)
718 HOST_WIDE_INT val
= INTVAL (op1
) + 1;
719 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
736 op_mode
= GET_MODE (op0
);
742 if (HONOR_NANS (op_mode
))
757 if (HONOR_NANS (op_mode
))
849 comp_mode
= V4SImode
;
853 comp_mode
= V2DImode
;
860 if (GET_MODE (op1
) == DFmode
861 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
864 if (is_set
== 0 && op1
== const0_rtx
865 && (GET_MODE (op0
) == SImode
866 || GET_MODE (op0
) == HImode
867 || GET_MODE (op0
) == QImode
) && scode
== SPU_EQ
)
869 /* Don't need to set a register with the result when we are
870 comparing against zero and branching. */
871 reverse_test
= !reverse_test
;
872 compare_result
= op0
;
876 compare_result
= gen_reg_rtx (comp_mode
);
885 if (spu_comp_icode
[index
][scode
] == 0)
888 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
890 op0
= force_reg (op_mode
, op0
);
891 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
893 op1
= force_reg (op_mode
, op1
);
894 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
898 emit_insn (comp_rtx
);
902 eq_result
= gen_reg_rtx (comp_mode
);
903 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
908 ior_code
= optab_handler (ior_optab
, comp_mode
);
909 gcc_assert (ior_code
!= CODE_FOR_nothing
);
910 emit_insn (GEN_FCN (ior_code
)
911 (compare_result
, compare_result
, eq_result
));
920 /* We don't have branch on QI compare insns, so we convert the
921 QI compare result to a HI result. */
922 if (comp_mode
== QImode
)
924 rtx old_res
= compare_result
;
925 compare_result
= gen_reg_rtx (HImode
);
927 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
931 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
933 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
935 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
936 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
937 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
940 else if (is_set
== 2)
942 rtx target
= operands
[0];
943 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
944 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
945 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
947 rtx op_t
= operands
[2];
948 rtx op_f
= operands
[3];
950 /* The result of the comparison can be SI, HI or QI mode. Create a
951 mask based on that result. */
952 if (target_size
> compare_size
)
954 select_mask
= gen_reg_rtx (mode
);
955 emit_insn (gen_extend_compare (select_mask
, compare_result
));
957 else if (target_size
< compare_size
)
959 gen_rtx_SUBREG (mode
, compare_result
,
960 (compare_size
- target_size
) / BITS_PER_UNIT
);
961 else if (comp_mode
!= mode
)
962 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
964 select_mask
= compare_result
;
966 if (GET_MODE (target
) != GET_MODE (op_t
)
967 || GET_MODE (target
) != GET_MODE (op_f
))
971 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
973 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
977 rtx target
= operands
[0];
979 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
980 gen_rtx_NOT (comp_mode
, compare_result
)));
981 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
982 emit_insn (gen_extendhisi2 (target
, compare_result
));
983 else if (GET_MODE (target
) == SImode
984 && GET_MODE (compare_result
) == QImode
)
985 emit_insn (gen_extend_compare (target
, compare_result
));
987 emit_move_insn (target
, compare_result
);
992 const_double_to_hwint (rtx x
)
996 if (GET_MODE (x
) == SFmode
)
998 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
999 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1001 else if (GET_MODE (x
) == DFmode
)
1004 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1005 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1007 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1015 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1019 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1022 tv
[0] = (v
<< 32) >> 32;
1023 else if (mode
== DFmode
)
1025 tv
[1] = (v
<< 32) >> 32;
1028 real_from_target (&rv
, tv
, mode
);
1029 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1033 print_operand_address (FILE * file
, register rtx addr
)
1038 if (GET_CODE (addr
) == AND
1039 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1040 && INTVAL (XEXP (addr
, 1)) == -16)
1041 addr
= XEXP (addr
, 0);
1043 switch (GET_CODE (addr
))
1046 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1050 reg
= XEXP (addr
, 0);
1051 offset
= XEXP (addr
, 1);
1052 if (GET_CODE (offset
) == REG
)
1054 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1055 reg_names
[REGNO (offset
)]);
1057 else if (GET_CODE (offset
) == CONST_INT
)
1059 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1060 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1070 output_addr_const (file
, addr
);
1080 print_operand (FILE * file
, rtx x
, int code
)
1082 enum machine_mode mode
= GET_MODE (x
);
1084 unsigned char arr
[16];
1085 int xcode
= GET_CODE (x
);
1087 if (GET_MODE (x
) == VOIDmode
)
1090 case 'L': /* 128 bits, signed */
1091 case 'm': /* 128 bits, signed */
1092 case 'T': /* 128 bits, signed */
1093 case 't': /* 128 bits, signed */
1096 case 'K': /* 64 bits, signed */
1097 case 'k': /* 64 bits, signed */
1098 case 'D': /* 64 bits, signed */
1099 case 'd': /* 64 bits, signed */
1102 case 'J': /* 32 bits, signed */
1103 case 'j': /* 32 bits, signed */
1104 case 's': /* 32 bits, signed */
1105 case 'S': /* 32 bits, signed */
1112 case 'j': /* 32 bits, signed */
1113 case 'k': /* 64 bits, signed */
1114 case 'm': /* 128 bits, signed */
1115 if (xcode
== CONST_INT
1116 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1118 gcc_assert (logical_immediate_p (x
, mode
));
1119 constant_to_array (mode
, x
, arr
);
1120 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1121 val
= trunc_int_for_mode (val
, SImode
);
1122 switch (which_logical_immediate (val
))
1127 fprintf (file
, "h");
1130 fprintf (file
, "b");
1140 case 'J': /* 32 bits, signed */
1141 case 'K': /* 64 bits, signed */
1142 case 'L': /* 128 bits, signed */
1143 if (xcode
== CONST_INT
1144 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1146 gcc_assert (logical_immediate_p (x
, mode
)
1147 || iohl_immediate_p (x
, mode
));
1148 constant_to_array (mode
, x
, arr
);
1149 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1150 val
= trunc_int_for_mode (val
, SImode
);
1151 switch (which_logical_immediate (val
))
1157 val
= trunc_int_for_mode (val
, HImode
);
1160 val
= trunc_int_for_mode (val
, QImode
);
1165 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1171 case 't': /* 128 bits, signed */
1172 case 'd': /* 64 bits, signed */
1173 case 's': /* 32 bits, signed */
1176 enum immediate_class c
= classify_immediate (x
, mode
);
1180 constant_to_array (mode
, x
, arr
);
1181 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1182 val
= trunc_int_for_mode (val
, SImode
);
1183 switch (which_immediate_load (val
))
1188 fprintf (file
, "a");
1191 fprintf (file
, "h");
1194 fprintf (file
, "hu");
1201 constant_to_array (mode
, x
, arr
);
1202 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1204 fprintf (file
, "b");
1206 fprintf (file
, "h");
1208 fprintf (file
, "w");
1210 fprintf (file
, "d");
1213 if (xcode
== CONST_VECTOR
)
1215 x
= CONST_VECTOR_ELT (x
, 0);
1216 xcode
= GET_CODE (x
);
1218 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1219 fprintf (file
, "a");
1220 else if (xcode
== HIGH
)
1221 fprintf (file
, "hu");
1235 case 'T': /* 128 bits, signed */
1236 case 'D': /* 64 bits, signed */
1237 case 'S': /* 32 bits, signed */
1240 enum immediate_class c
= classify_immediate (x
, mode
);
1244 constant_to_array (mode
, x
, arr
);
1245 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1246 val
= trunc_int_for_mode (val
, SImode
);
1247 switch (which_immediate_load (val
))
1254 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1259 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1262 constant_to_array (mode
, x
, arr
);
1264 for (i
= 0; i
< 16; i
++)
1269 print_operand (file
, GEN_INT (val
), 0);
1272 constant_to_array (mode
, x
, arr
);
1273 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1274 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1279 if (GET_CODE (x
) == CONST_VECTOR
)
1280 x
= CONST_VECTOR_ELT (x
, 0);
1281 output_addr_const (file
, x
);
1283 fprintf (file
, "@h");
1297 if (xcode
== CONST_INT
)
1299 /* Only 4 least significant bits are relevant for generate
1300 control word instructions. */
1301 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1306 case 'M': /* print code for c*d */
1307 if (GET_CODE (x
) == CONST_INT
)
1311 fprintf (file
, "b");
1314 fprintf (file
, "h");
1317 fprintf (file
, "w");
1320 fprintf (file
, "d");
1329 case 'N': /* Negate the operand */
1330 if (xcode
== CONST_INT
)
1331 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1332 else if (xcode
== CONST_VECTOR
)
1333 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1334 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1337 case 'I': /* enable/disable interrupts */
1338 if (xcode
== CONST_INT
)
1339 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1342 case 'b': /* branch modifiers */
1344 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1345 else if (COMPARISON_P (x
))
1346 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1349 case 'i': /* indirect call */
1352 if (GET_CODE (XEXP (x
, 0)) == REG
)
1353 /* Used in indirect function calls. */
1354 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1356 output_address (XEXP (x
, 0));
1360 case 'p': /* load/store */
1364 xcode
= GET_CODE (x
);
1369 xcode
= GET_CODE (x
);
1372 fprintf (file
, "d");
1373 else if (xcode
== CONST_INT
)
1374 fprintf (file
, "a");
1375 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1376 fprintf (file
, "r");
1377 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1379 if (GET_CODE (XEXP (x
, 1)) == REG
)
1380 fprintf (file
, "x");
1382 fprintf (file
, "d");
1387 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1389 output_addr_const (file
, GEN_INT (val
));
1393 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1395 output_addr_const (file
, GEN_INT (val
));
1399 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1401 output_addr_const (file
, GEN_INT (val
));
1405 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1406 val
= (val
>> 3) & 0x1f;
1407 output_addr_const (file
, GEN_INT (val
));
1411 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1414 output_addr_const (file
, GEN_INT (val
));
1418 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1421 output_addr_const (file
, GEN_INT (val
));
1425 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1428 output_addr_const (file
, GEN_INT (val
));
1432 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1433 val
= -(val
& -8ll);
1434 val
= (val
>> 3) & 0x1f;
1435 output_addr_const (file
, GEN_INT (val
));
1440 constant_to_array (mode
, x
, arr
);
1441 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1442 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1447 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1448 else if (xcode
== MEM
)
1449 output_address (XEXP (x
, 0));
1450 else if (xcode
== CONST_VECTOR
)
1451 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1453 output_addr_const (file
, x
);
1460 output_operand_lossage ("invalid %%xn code");
1465 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1466 caller saved register. For leaf functions it is more efficient to
1467 use a volatile register because we won't need to save and restore the
1468 pic register. This routine is only valid after register allocation
1469 is completed, so we can pick an unused register. */
1473 if (!reload_completed
&& !reload_in_progress
)
1476 /* If we've already made the decision, we need to keep with it. Once we've
1477 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1478 return true since the register is now live; this should not cause us to
1479 "switch back" to using pic_offset_table_rtx. */
1480 if (!cfun
->machine
->pic_reg
)
1482 if (crtl
->is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1483 cfun
->machine
->pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1485 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1488 return cfun
->machine
->pic_reg
;
1491 /* Split constant addresses to handle cases that are too large.
1492 Add in the pic register when in PIC mode.
1493 Split immediates that require more than 1 instruction. */
1495 spu_split_immediate (rtx
* ops
)
1497 enum machine_mode mode
= GET_MODE (ops
[0]);
1498 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1504 unsigned char arrhi
[16];
1505 unsigned char arrlo
[16];
1506 rtx to
, temp
, hi
, lo
;
1508 enum machine_mode imode
= mode
;
1509 /* We need to do reals as ints because the constant used in the
1510 IOR might not be a legitimate real constant. */
1511 imode
= int_mode_for_mode (mode
);
1512 constant_to_array (mode
, ops
[1], arrhi
);
1514 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1517 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1518 for (i
= 0; i
< 16; i
+= 4)
1520 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1521 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1522 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1523 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1525 hi
= array_to_constant (imode
, arrhi
);
1526 lo
= array_to_constant (imode
, arrlo
);
1527 emit_move_insn (temp
, hi
);
1528 emit_insn (gen_rtx_SET
1529 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1534 unsigned char arr_fsmbi
[16];
1535 unsigned char arr_andbi
[16];
1536 rtx to
, reg_fsmbi
, reg_and
;
1538 enum machine_mode imode
= mode
;
1539 /* We need to do reals as ints because the constant used in the
1540 * AND might not be a legitimate real constant. */
1541 imode
= int_mode_for_mode (mode
);
1542 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1544 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1547 for (i
= 0; i
< 16; i
++)
1548 if (arr_fsmbi
[i
] != 0)
1550 arr_andbi
[0] = arr_fsmbi
[i
];
1551 arr_fsmbi
[i
] = 0xff;
1553 for (i
= 1; i
< 16; i
++)
1554 arr_andbi
[i
] = arr_andbi
[0];
1555 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1556 reg_and
= array_to_constant (imode
, arr_andbi
);
1557 emit_move_insn (to
, reg_fsmbi
);
1558 emit_insn (gen_rtx_SET
1559 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1563 if (reload_in_progress
|| reload_completed
)
1565 rtx mem
= force_const_mem (mode
, ops
[1]);
1566 if (TARGET_LARGE_MEM
)
1568 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1569 emit_move_insn (addr
, XEXP (mem
, 0));
1570 mem
= replace_equiv_address (mem
, addr
);
1572 emit_move_insn (ops
[0], mem
);
1578 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1582 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1583 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1586 emit_insn (gen_pic (ops
[0], ops
[1]));
1589 rtx pic_reg
= get_pic_reg ();
1590 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1592 return flag_pic
|| c
== IC_IL2s
;
1603 /* SAVING is TRUE when we are generating the actual load and store
1604 instructions for REGNO. When determining the size of the stack
1605 needed for saving register we must allocate enough space for the
1606 worst case, because we don't always have the information early enough
1607 to not allocate it. But we can at least eliminate the actual loads
1608 and stores during the prologue/epilogue. */
1610 need_to_save_reg (int regno
, int saving
)
1612 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1615 && regno
== PIC_OFFSET_TABLE_REGNUM
1616 && (!saving
|| cfun
->machine
->pic_reg
== pic_offset_table_rtx
))
1621 /* This function is only correct starting with local register
1624 spu_saved_regs_size (void)
1626 int reg_save_size
= 0;
1629 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1630 if (need_to_save_reg (regno
, 0))
1631 reg_save_size
+= 0x10;
1632 return reg_save_size
;
1636 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1638 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1640 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1641 return emit_insn (gen_movv4si (mem
, reg
));
1645 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1647 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1649 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1650 return emit_insn (gen_movv4si (reg
, mem
));
1653 /* This happens after reload, so we need to expand it. */
1655 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1658 if (satisfies_constraint_K (GEN_INT (imm
)))
1660 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1664 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1665 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1666 if (REGNO (src
) == REGNO (scratch
))
1672 /* Return nonzero if this function is known to have a null epilogue. */
1675 direct_return (void)
1677 if (reload_completed
)
1679 if (cfun
->static_chain_decl
== 0
1680 && (spu_saved_regs_size ()
1682 + crtl
->outgoing_args_size
1683 + crtl
->args
.pretend_args_size
== 0)
1691 The stack frame looks like this:
1695 AP -> +-------------+
1698 prev SP | back chain |
1701 | reg save | crtl->args.pretend_args_size bytes
1704 | saved regs | spu_saved_regs_size() bytes
1705 FP -> +-------------+
1707 | vars | get_frame_size() bytes
1708 HFP -> +-------------+
1711 | args | crtl->outgoing_args_size bytes
1717 SP -> +-------------+
1721 spu_expand_prologue (void)
1723 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1724 HOST_WIDE_INT total_size
;
1725 HOST_WIDE_INT saved_regs_size
;
1726 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1727 rtx scratch_reg_0
, scratch_reg_1
;
1730 if (flag_pic
&& optimize
== 0 && !cfun
->machine
->pic_reg
)
1731 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1733 if (spu_naked_function_p (current_function_decl
))
1736 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1737 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1739 saved_regs_size
= spu_saved_regs_size ();
1740 total_size
= size
+ saved_regs_size
1741 + crtl
->outgoing_args_size
1742 + crtl
->args
.pretend_args_size
;
1745 || cfun
->calls_alloca
|| total_size
> 0)
1746 total_size
+= STACK_POINTER_OFFSET
;
1748 /* Save this first because code after this might use the link
1749 register as a scratch register. */
1752 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1753 RTX_FRAME_RELATED_P (insn
) = 1;
1758 offset
= -crtl
->args
.pretend_args_size
;
1759 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1760 if (need_to_save_reg (regno
, 1))
1763 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1764 RTX_FRAME_RELATED_P (insn
) = 1;
1768 if (flag_pic
&& cfun
->machine
->pic_reg
)
1770 rtx pic_reg
= cfun
->machine
->pic_reg
;
1771 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1772 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1777 if (flag_stack_check
)
1779 /* We compare against total_size-1 because
1780 ($sp >= total_size) <=> ($sp > total_size-1) */
1781 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1782 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1783 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1784 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1786 emit_move_insn (scratch_v4si
, size_v4si
);
1787 size_v4si
= scratch_v4si
;
1789 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1790 emit_insn (gen_vec_extractv4si
1791 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1792 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1795 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1796 the value of the previous $sp because we save it as the back
1798 if (total_size
<= 2000)
1800 /* In this case we save the back chain first. */
1801 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1803 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1807 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1809 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1811 RTX_FRAME_RELATED_P (insn
) = 1;
1812 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1813 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1815 if (total_size
> 2000)
1817 /* Save the back chain ptr */
1818 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1821 if (frame_pointer_needed
)
1823 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1824 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1825 + crtl
->outgoing_args_size
;
1826 /* Set the new frame_pointer */
1827 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1828 RTX_FRAME_RELATED_P (insn
) = 1;
1829 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1830 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1831 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1835 if (flag_stack_usage_info
)
1836 current_function_static_stack_size
= total_size
;
1840 spu_expand_epilogue (bool sibcall_p
)
1842 int size
= get_frame_size (), offset
, regno
;
1843 HOST_WIDE_INT saved_regs_size
, total_size
;
1844 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1847 if (spu_naked_function_p (current_function_decl
))
1850 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1852 saved_regs_size
= spu_saved_regs_size ();
1853 total_size
= size
+ saved_regs_size
1854 + crtl
->outgoing_args_size
1855 + crtl
->args
.pretend_args_size
;
1858 || cfun
->calls_alloca
|| total_size
> 0)
1859 total_size
+= STACK_POINTER_OFFSET
;
1863 if (cfun
->calls_alloca
)
1864 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1866 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1869 if (saved_regs_size
> 0)
1871 offset
= -crtl
->args
.pretend_args_size
;
1872 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1873 if (need_to_save_reg (regno
, 1))
1876 frame_emit_load (regno
, sp_reg
, offset
);
1882 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1886 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1887 emit_jump_insn (gen__return ());
1892 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1896 /* This is inefficient because it ends up copying to a save-register
1897 which then gets saved even though $lr has already been saved. But
1898 it does generate better code for leaf functions and we don't need
1899 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1900 used for __builtin_return_address anyway, so maybe we don't care if
1901 it's inefficient. */
1902 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1906 /* Given VAL, generate a constant appropriate for MODE.
1907 If MODE is a vector mode, every element will be VAL.
1908 For TImode, VAL will be zero extended to 128 bits. */
1910 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
1916 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1917 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1918 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1919 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1921 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1922 return immed_double_const (val
, 0, mode
);
1924 /* val is the bit representation of the float */
1925 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1926 return hwint_to_const_double (mode
, val
);
1928 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1929 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1931 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1933 units
= GET_MODE_NUNITS (mode
);
1935 v
= rtvec_alloc (units
);
1937 for (i
= 0; i
< units
; ++i
)
1938 RTVEC_ELT (v
, i
) = inner
;
1940 return gen_rtx_CONST_VECTOR (mode
, v
);
1943 /* Create a MODE vector constant from 4 ints. */
1945 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
1947 unsigned char arr
[16];
1948 arr
[0] = (a
>> 24) & 0xff;
1949 arr
[1] = (a
>> 16) & 0xff;
1950 arr
[2] = (a
>> 8) & 0xff;
1951 arr
[3] = (a
>> 0) & 0xff;
1952 arr
[4] = (b
>> 24) & 0xff;
1953 arr
[5] = (b
>> 16) & 0xff;
1954 arr
[6] = (b
>> 8) & 0xff;
1955 arr
[7] = (b
>> 0) & 0xff;
1956 arr
[8] = (c
>> 24) & 0xff;
1957 arr
[9] = (c
>> 16) & 0xff;
1958 arr
[10] = (c
>> 8) & 0xff;
1959 arr
[11] = (c
>> 0) & 0xff;
1960 arr
[12] = (d
>> 24) & 0xff;
1961 arr
[13] = (d
>> 16) & 0xff;
1962 arr
[14] = (d
>> 8) & 0xff;
1963 arr
[15] = (d
>> 0) & 0xff;
1964 return array_to_constant(mode
, arr
);
1967 /* branch hint stuff */
1969 /* An array of these is used to propagate hints to predecessor blocks. */
1972 rtx prop_jump
; /* propagated from another block */
1973 int bb_index
; /* the original block. */
1975 static struct spu_bb_info
*spu_bb_info
;
1977 #define STOP_HINT_P(INSN) \
1979 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1980 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1982 /* 1 when RTX is a hinted branch or its target. We keep track of
1983 what has been hinted so the safe-hint code can test it easily. */
1984 #define HINTED_P(RTX) \
1985 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1987 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1988 #define SCHED_ON_EVEN_P(RTX) \
1989 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1991 /* Emit a nop for INSN such that the two will dual issue. This assumes
1992 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1993 We check for TImode to handle a MULTI1 insn which has dual issued its
1994 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1996 emit_nop_for_insn (rtx insn
)
2001 /* We need to handle JUMP_TABLE_DATA separately. */
2002 if (JUMP_TABLE_DATA_P (insn
))
2004 new_insn
= emit_insn_after (gen_lnop(), insn
);
2005 recog_memoized (new_insn
);
2006 INSN_LOCATION (new_insn
) = UNKNOWN_LOCATION
;
2010 p
= get_pipe (insn
);
2011 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2012 new_insn
= emit_insn_after (gen_lnop (), insn
);
2013 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2015 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2016 PUT_MODE (new_insn
, TImode
);
2017 PUT_MODE (insn
, VOIDmode
);
2020 new_insn
= emit_insn_after (gen_lnop (), insn
);
2021 recog_memoized (new_insn
);
2022 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
2025 /* Insert nops in basic blocks to meet dual issue alignment
2026 requirements. Also make sure hbrp and hint instructions are at least
2027 one cycle apart, possibly inserting a nop. */
2031 rtx insn
, next_insn
, prev_insn
, hbr_insn
= 0;
2035 /* This sets up INSN_ADDRESSES. */
2036 shorten_branches (get_insns ());
2038 /* Keep track of length added by nops. */
2042 insn
= get_insns ();
2043 if (!active_insn_p (insn
))
2044 insn
= next_active_insn (insn
);
2045 for (; insn
; insn
= next_insn
)
2047 next_insn
= next_active_insn (insn
);
2048 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2049 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2053 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2054 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2055 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2058 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2059 PUT_MODE (prev_insn
, GET_MODE (insn
));
2060 PUT_MODE (insn
, TImode
);
2061 INSN_LOCATION (prev_insn
) = INSN_LOCATION (insn
);
2067 if (INSN_CODE (insn
) == CODE_FOR_blockage
)
2069 if (GET_MODE (insn
) == TImode
)
2070 PUT_MODE (next_insn
, TImode
);
2072 next_insn
= next_active_insn (insn
);
2074 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2075 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2077 if (((addr
+ length
) & 7) != 0)
2079 emit_nop_for_insn (prev_insn
);
2083 else if (GET_MODE (insn
) == TImode
2084 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2085 || get_attr_type (insn
) == TYPE_MULTI0
)
2086 && ((addr
+ length
) & 7) != 0)
2088 /* prev_insn will always be set because the first insn is
2089 always 8-byte aligned. */
2090 emit_nop_for_insn (prev_insn
);
2098 /* Routines for branch hints. */
2101 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
2102 int distance
, sbitmap blocks
)
2104 rtx branch_label
= 0;
2109 if (before
== 0 || branch
== 0 || target
== 0)
2112 /* While scheduling we require hints to be no further than 600, so
2113 we need to enforce that here too */
2117 /* If we have a Basic block note, emit it after the basic block note. */
2118 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2119 before
= NEXT_INSN (before
);
2121 branch_label
= gen_label_rtx ();
2122 LABEL_NUSES (branch_label
)++;
2123 LABEL_PRESERVE_P (branch_label
) = 1;
2124 insn
= emit_label_before (branch_label
, branch
);
2125 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2126 bitmap_set_bit (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2128 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2129 recog_memoized (hint
);
2130 INSN_LOCATION (hint
) = INSN_LOCATION (branch
);
2131 HINTED_P (branch
) = 1;
2133 if (GET_CODE (target
) == LABEL_REF
)
2134 HINTED_P (XEXP (target
, 0)) = 1;
2135 else if (tablejump_p (branch
, 0, &table
))
2139 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2140 vec
= XVEC (PATTERN (table
), 0);
2142 vec
= XVEC (PATTERN (table
), 1);
2143 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2144 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2147 if (distance
>= 588)
2149 /* Make sure the hint isn't scheduled any earlier than this point,
2150 which could make it too far for the branch offest to fit */
2151 insn
= emit_insn_before (gen_blockage (), hint
);
2152 recog_memoized (insn
);
2153 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2155 else if (distance
<= 8 * 4)
2157 /* To guarantee at least 8 insns between the hint and branch we
2160 for (d
= distance
; d
< 8 * 4; d
+= 4)
2163 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2164 recog_memoized (insn
);
2165 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2168 /* Make sure any nops inserted aren't scheduled before the hint. */
2169 insn
= emit_insn_after (gen_blockage (), hint
);
2170 recog_memoized (insn
);
2171 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2173 /* Make sure any nops inserted aren't scheduled after the call. */
2174 if (CALL_P (branch
) && distance
< 8 * 4)
2176 insn
= emit_insn_before (gen_blockage (), branch
);
2177 recog_memoized (insn
);
2178 INSN_LOCATION (insn
) = INSN_LOCATION (branch
);
2183 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2184 the rtx for the branch target. */
2186 get_branch_target (rtx branch
)
2188 if (JUMP_P (branch
))
2192 /* Return statements */
2193 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2194 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2197 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2200 set
= single_set (branch
);
2201 src
= SET_SRC (set
);
2202 if (GET_CODE (SET_DEST (set
)) != PC
)
2205 if (GET_CODE (src
) == IF_THEN_ELSE
)
2208 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2211 /* If the more probable case is not a fall through, then
2212 try a branch hint. */
2213 int prob
= XINT (note
, 0);
2214 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2215 && GET_CODE (XEXP (src
, 1)) != PC
)
2216 lab
= XEXP (src
, 1);
2217 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2218 && GET_CODE (XEXP (src
, 2)) != PC
)
2219 lab
= XEXP (src
, 2);
2223 if (GET_CODE (lab
) == RETURN
)
2224 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2232 else if (CALL_P (branch
))
2235 /* All of our call patterns are in a PARALLEL and the CALL is
2236 the first pattern in the PARALLEL. */
2237 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2239 call
= XVECEXP (PATTERN (branch
), 0, 0);
2240 if (GET_CODE (call
) == SET
)
2241 call
= SET_SRC (call
);
2242 if (GET_CODE (call
) != CALL
)
2244 return XEXP (XEXP (call
, 0), 0);
2249 /* The special $hbr register is used to prevent the insn scheduler from
2250 moving hbr insns across instructions which invalidate them. It
2251 should only be used in a clobber, and this function searches for
2252 insns which clobber it. */
2254 insn_clobbers_hbr (rtx insn
)
2257 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2259 rtx parallel
= PATTERN (insn
);
2262 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2264 clobber
= XVECEXP (parallel
, 0, j
);
2265 if (GET_CODE (clobber
) == CLOBBER
2266 && GET_CODE (XEXP (clobber
, 0)) == REG
2267 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2274 /* Search up to 32 insns starting at FIRST:
2275 - at any kind of hinted branch, just return
2276 - at any unconditional branch in the first 15 insns, just return
2277 - at a call or indirect branch, after the first 15 insns, force it to
2278 an even address and return
2279 - at any unconditional branch, after the first 15 insns, force it to
2281 At then end of the search, insert an hbrp within 4 insns of FIRST,
2282 and an hbrp within 16 instructions of FIRST.
2285 insert_hbrp_for_ilb_runout (rtx first
)
2287 rtx insn
, before_4
= 0, before_16
= 0;
2288 int addr
= 0, length
, first_addr
= -1;
2289 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2290 int insert_lnop_after
= 0;
2291 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2294 if (first_addr
== -1)
2295 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2296 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2297 length
= get_attr_length (insn
);
2299 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2301 /* We test for 14 instructions because the first hbrp will add
2302 up to 2 instructions. */
2303 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2306 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2308 /* Make sure an hbrp is at least 2 cycles away from a hint.
2309 Insert an lnop after the hbrp when necessary. */
2310 if (before_4
== 0 && addr
> 0)
2313 insert_lnop_after
|= 1;
2315 else if (before_4
&& addr
<= 4 * 4)
2316 insert_lnop_after
|= 1;
2317 if (before_16
== 0 && addr
> 10 * 4)
2320 insert_lnop_after
|= 2;
2322 else if (before_16
&& addr
<= 14 * 4)
2323 insert_lnop_after
|= 2;
2326 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2328 if (addr
< hbrp_addr0
)
2330 else if (addr
< hbrp_addr1
)
2334 if (CALL_P (insn
) || JUMP_P (insn
))
2336 if (HINTED_P (insn
))
2339 /* Any branch after the first 15 insns should be on an even
2340 address to avoid a special case branch. There might be
2341 some nops and/or hbrps inserted, so we test after 10
2344 SCHED_ON_EVEN_P (insn
) = 1;
2347 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2351 if (addr
+ length
>= 32 * 4)
2353 gcc_assert (before_4
&& before_16
);
2354 if (hbrp_addr0
> 4 * 4)
2357 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2358 recog_memoized (insn
);
2359 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2360 INSN_ADDRESSES_NEW (insn
,
2361 INSN_ADDRESSES (INSN_UID (before_4
)));
2362 PUT_MODE (insn
, GET_MODE (before_4
));
2363 PUT_MODE (before_4
, TImode
);
2364 if (insert_lnop_after
& 1)
2366 insn
= emit_insn_before (gen_lnop (), before_4
);
2367 recog_memoized (insn
);
2368 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2369 INSN_ADDRESSES_NEW (insn
,
2370 INSN_ADDRESSES (INSN_UID (before_4
)));
2371 PUT_MODE (insn
, TImode
);
2374 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2375 && hbrp_addr1
> 16 * 4)
2378 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2379 recog_memoized (insn
);
2380 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2381 INSN_ADDRESSES_NEW (insn
,
2382 INSN_ADDRESSES (INSN_UID (before_16
)));
2383 PUT_MODE (insn
, GET_MODE (before_16
));
2384 PUT_MODE (before_16
, TImode
);
2385 if (insert_lnop_after
& 2)
2387 insn
= emit_insn_before (gen_lnop (), before_16
);
2388 recog_memoized (insn
);
2389 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2390 INSN_ADDRESSES_NEW (insn
,
2391 INSN_ADDRESSES (INSN_UID
2393 PUT_MODE (insn
, TImode
);
2399 else if (BARRIER_P (insn
))
2404 /* The SPU might hang when it executes 48 inline instructions after a
2405 hinted branch jumps to its hinted target. The beginning of a
2406 function and the return from a call might have been hinted, and
2407 must be handled as well. To prevent a hang we insert 2 hbrps. The
2408 first should be within 6 insns of the branch target. The second
2409 should be within 22 insns of the branch target. When determining
2410 if hbrps are necessary, we look for only 32 inline instructions,
2411 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2412 when inserting new hbrps, we insert them within 4 and 16 insns of
2418 if (TARGET_SAFE_HINTS
)
2420 shorten_branches (get_insns ());
2421 /* Insert hbrp at beginning of function */
2422 insn
= next_active_insn (get_insns ());
2424 insert_hbrp_for_ilb_runout (insn
);
2425 /* Insert hbrp after hinted targets. */
2426 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2427 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2428 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2432 static int in_spu_reorg
;
2435 spu_var_tracking (void)
2437 if (flag_var_tracking
)
2440 timevar_push (TV_VAR_TRACKING
);
2441 variable_tracking_main ();
2442 timevar_pop (TV_VAR_TRACKING
);
2443 df_finish_pass (false);
2447 /* Insert branch hints. There are no branch optimizations after this
2448 pass, so it's safe to set our branch hints now. */
2450 spu_machine_dependent_reorg (void)
2455 rtx branch_target
= 0;
2456 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2460 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2462 /* We still do it for unoptimized code because an external
2463 function might have hinted a call or return. */
2464 compute_bb_for_insn ();
2467 spu_var_tracking ();
2468 free_bb_for_insn ();
2472 blocks
= sbitmap_alloc (last_basic_block
);
2473 bitmap_clear (blocks
);
2476 compute_bb_for_insn ();
2478 /* (Re-)discover loops so that bb->loop_father can be used
2479 in the analysis below. */
2480 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2485 (struct spu_bb_info
*) xcalloc (n_basic_blocks_for_fn (cfun
),
2486 sizeof (struct spu_bb_info
));
2488 /* We need exact insn addresses and lengths. */
2489 shorten_branches (get_insns ());
2491 for (i
= n_basic_blocks_for_fn (cfun
) - 1; i
>= 0; i
--)
2493 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2495 if (spu_bb_info
[i
].prop_jump
)
2497 branch
= spu_bb_info
[i
].prop_jump
;
2498 branch_target
= get_branch_target (branch
);
2499 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2500 required_dist
= spu_hint_dist
;
2502 /* Search from end of a block to beginning. In this loop, find
2503 jumps which need a branch and emit them only when:
2504 - it's an indirect branch and we're at the insn which sets
2506 - we're at an insn that will invalidate the hint. e.g., a
2507 call, another hint insn, inline asm that clobbers $hbr, and
2508 some inlined operations (divmodsi4). Don't consider jumps
2509 because they are only at the end of a block and are
2510 considered when we are deciding whether to propagate
2511 - we're getting too far away from the branch. The hbr insns
2512 only have a signed 10 bit offset
2513 We go back as far as possible so the branch will be considered
2514 for propagation when we get to the beginning of the block. */
2515 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2519 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2521 && ((GET_CODE (branch_target
) == REG
2522 && set_of (branch_target
, insn
) != NULL_RTX
)
2523 || insn_clobbers_hbr (insn
)
2524 || branch_addr
- insn_addr
> 600))
2526 rtx next
= NEXT_INSN (insn
);
2527 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2528 if (insn
!= BB_END (bb
)
2529 && branch_addr
- next_addr
>= required_dist
)
2533 "hint for %i in block %i before %i\n",
2534 INSN_UID (branch
), bb
->index
,
2536 spu_emit_branch_hint (next
, branch
, branch_target
,
2537 branch_addr
- next_addr
, blocks
);
2542 /* JUMP_P will only be true at the end of a block. When
2543 branch is already set it means we've previously decided
2544 to propagate a hint for that branch into this block. */
2545 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2548 if ((branch_target
= get_branch_target (insn
)))
2551 branch_addr
= insn_addr
;
2552 required_dist
= spu_hint_dist
;
2556 if (insn
== BB_HEAD (bb
))
2562 /* If we haven't emitted a hint for this branch yet, it might
2563 be profitable to emit it in one of the predecessor blocks,
2564 especially for loops. */
2566 basic_block prev
= 0, prop
= 0, prev2
= 0;
2567 int loop_exit
= 0, simple_loop
= 0;
2568 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2570 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2571 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2572 prev
= EDGE_PRED (bb
, j
)->src
;
2574 prev2
= EDGE_PRED (bb
, j
)->src
;
2576 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2577 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2579 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2582 /* If this branch is a loop exit then propagate to previous
2583 fallthru block. This catches the cases when it is a simple
2584 loop or when there is an initial branch into the loop. */
2585 if (prev
&& (loop_exit
|| simple_loop
)
2586 && bb_loop_depth (prev
) <= bb_loop_depth (bb
))
2589 /* If there is only one adjacent predecessor. Don't propagate
2590 outside this loop. */
2591 else if (prev
&& single_pred_p (bb
)
2592 && prev
->loop_father
== bb
->loop_father
)
2595 /* If this is the JOIN block of a simple IF-THEN then
2596 propagate the hint to the HEADER block. */
2597 else if (prev
&& prev2
2598 && EDGE_COUNT (bb
->preds
) == 2
2599 && EDGE_COUNT (prev
->preds
) == 1
2600 && EDGE_PRED (prev
, 0)->src
== prev2
2601 && prev2
->loop_father
== bb
->loop_father
2602 && GET_CODE (branch_target
) != REG
)
2605 /* Don't propagate when:
2606 - this is a simple loop and the hint would be too far
2607 - this is not a simple loop and there are 16 insns in
2609 - the predecessor block ends in a branch that will be
2611 - the predecessor block ends in an insn that invalidates
2615 && (bbend
= BB_END (prop
))
2616 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2617 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2618 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2621 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2622 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2623 bb
->index
, prop
->index
, bb_loop_depth (bb
),
2624 INSN_UID (branch
), loop_exit
, simple_loop
,
2625 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2627 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2628 spu_bb_info
[prop
->index
].bb_index
= i
;
2630 else if (branch_addr
- next_addr
>= required_dist
)
2633 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2634 INSN_UID (branch
), bb
->index
,
2635 INSN_UID (NEXT_INSN (insn
)));
2636 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2637 branch_addr
- next_addr
, blocks
);
2644 if (!bitmap_empty_p (blocks
))
2645 find_many_sub_basic_blocks (blocks
);
2647 /* We have to schedule to make sure alignment is ok. */
2648 FOR_EACH_BB (bb
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2650 /* The hints need to be scheduled, so call it again. */
2652 df_finish_pass (true);
2658 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2659 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2661 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2662 between its branch label and the branch . We don't move the
2663 label because GCC expects it at the beginning of the block. */
2664 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2665 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2666 rtx label
= XEXP (label_ref
, 0);
2669 for (branch
= NEXT_INSN (label
);
2670 !JUMP_P (branch
) && !CALL_P (branch
);
2671 branch
= NEXT_INSN (branch
))
2672 if (NONJUMP_INSN_P (branch
))
2673 offset
+= get_attr_length (branch
);
2675 XVECEXP (unspec
, 0, 0) = plus_constant (Pmode
, label_ref
, offset
);
2678 spu_var_tracking ();
2680 loop_optimizer_finalize ();
2682 free_bb_for_insn ();
2688 /* Insn scheduling routines, primarily for dual issue. */
2690 spu_sched_issue_rate (void)
2696 uses_ls_unit(rtx insn
)
2698 rtx set
= single_set (insn
);
2700 && (GET_CODE (SET_DEST (set
)) == MEM
2701 || GET_CODE (SET_SRC (set
)) == MEM
))
2710 /* Handle inline asm */
2711 if (INSN_CODE (insn
) == -1)
2713 t
= get_attr_type (insn
);
2738 case TYPE_IPREFETCH
:
2746 /* haifa-sched.c has a static variable that keeps track of the current
2747 cycle. It is passed to spu_sched_reorder, and we record it here for
2748 use by spu_sched_variable_issue. It won't be accurate if the
2749 scheduler updates it's clock_var between the two calls. */
2750 static int clock_var
;
2752 /* This is used to keep track of insn alignment. Set to 0 at the
2753 beginning of each block and increased by the "length" attr of each
2755 static int spu_sched_length
;
2757 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2758 ready list appropriately in spu_sched_reorder(). */
2759 static int pipe0_clock
;
2760 static int pipe1_clock
;
2762 static int prev_clock_var
;
2764 static int prev_priority
;
2766 /* The SPU needs to load the next ilb sometime during the execution of
2767 the previous ilb. There is a potential conflict if every cycle has a
2768 load or store. To avoid the conflict we make sure the load/store
2769 unit is free for at least one cycle during the execution of insns in
2770 the previous ilb. */
2771 static int spu_ls_first
;
2772 static int prev_ls_clock
;
2775 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2776 int max_ready ATTRIBUTE_UNUSED
)
2778 spu_sched_length
= 0;
2782 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2783 int max_ready ATTRIBUTE_UNUSED
)
2785 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2787 /* When any block might be at least 8-byte aligned, assume they
2788 will all be at least 8-byte aligned to make sure dual issue
2789 works out correctly. */
2790 spu_sched_length
= 0;
2792 spu_ls_first
= INT_MAX
;
2797 prev_clock_var
= -1;
2802 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2803 int verbose ATTRIBUTE_UNUSED
, rtx insn
, int more
)
2807 if (GET_CODE (PATTERN (insn
)) == USE
2808 || GET_CODE (PATTERN (insn
)) == CLOBBER
2809 || (len
= get_attr_length (insn
)) == 0)
2812 spu_sched_length
+= len
;
2814 /* Reset on inline asm */
2815 if (INSN_CODE (insn
) == -1)
2817 spu_ls_first
= INT_MAX
;
2822 p
= get_pipe (insn
);
2824 pipe0_clock
= clock_var
;
2826 pipe1_clock
= clock_var
;
2830 if (clock_var
- prev_ls_clock
> 1
2831 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2832 spu_ls_first
= INT_MAX
;
2833 if (uses_ls_unit (insn
))
2835 if (spu_ls_first
== INT_MAX
)
2836 spu_ls_first
= spu_sched_length
;
2837 prev_ls_clock
= clock_var
;
2840 /* The scheduler hasn't inserted the nop, but we will later on.
2841 Include those nops in spu_sched_length. */
2842 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2843 spu_sched_length
+= 4;
2844 prev_clock_var
= clock_var
;
2846 /* more is -1 when called from spu_sched_reorder for new insns
2847 that don't have INSN_PRIORITY */
2849 prev_priority
= INSN_PRIORITY (insn
);
2852 /* Always try issuing more insns. spu_sched_reorder will decide
2853 when the cycle should be advanced. */
2857 /* This function is called for both TARGET_SCHED_REORDER and
2858 TARGET_SCHED_REORDER2. */
2860 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2861 rtx
*ready
, int *nreadyp
, int clock
)
2863 int i
, nready
= *nreadyp
;
2864 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
2869 if (nready
<= 0 || pipe1_clock
>= clock
)
2872 /* Find any rtl insns that don't generate assembly insns and schedule
2874 for (i
= nready
- 1; i
>= 0; i
--)
2877 if (INSN_CODE (insn
) == -1
2878 || INSN_CODE (insn
) == CODE_FOR_blockage
2879 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
2881 ready
[i
] = ready
[nready
- 1];
2882 ready
[nready
- 1] = insn
;
2887 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
2888 for (i
= 0; i
< nready
; i
++)
2889 if (INSN_CODE (ready
[i
]) != -1)
2892 switch (get_attr_type (insn
))
2917 case TYPE_IPREFETCH
:
2923 /* In the first scheduling phase, schedule loads and stores together
2924 to increase the chance they will get merged during postreload CSE. */
2925 if (!reload_completed
&& pipe_ls
>= 0)
2927 insn
= ready
[pipe_ls
];
2928 ready
[pipe_ls
] = ready
[nready
- 1];
2929 ready
[nready
- 1] = insn
;
2933 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2937 /* When we have loads/stores in every cycle of the last 15 insns and
2938 we are about to schedule another load/store, emit an hbrp insn
2941 && spu_sched_length
- spu_ls_first
>= 4 * 15
2942 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
2944 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2945 recog_memoized (insn
);
2946 if (pipe0_clock
< clock
)
2947 PUT_MODE (insn
, TImode
);
2948 spu_sched_variable_issue (file
, verbose
, insn
, -1);
2952 /* In general, we want to emit nops to increase dual issue, but dual
2953 issue isn't faster when one of the insns could be scheduled later
2954 without effecting the critical path. We look at INSN_PRIORITY to
2955 make a good guess, but it isn't perfect so -mdual-nops=n can be
2956 used to effect it. */
2957 if (in_spu_reorg
&& spu_dual_nops
< 10)
2959 /* When we are at an even address and we are not issuing nops to
2960 improve scheduling then we need to advance the cycle. */
2961 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
2962 && (spu_dual_nops
== 0
2965 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
2968 /* When at an odd address, schedule the highest priority insn
2969 without considering pipeline. */
2970 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
2971 && (spu_dual_nops
== 0
2973 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
2978 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2979 pipe0 insn in the ready list, schedule it. */
2980 if (pipe0_clock
< clock
&& pipe_0
>= 0)
2981 schedule_i
= pipe_0
;
2983 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2984 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2986 schedule_i
= pipe_1
;
2988 if (schedule_i
> -1)
2990 insn
= ready
[schedule_i
];
2991 ready
[schedule_i
] = ready
[nready
- 1];
2992 ready
[nready
- 1] = insn
;
2998 /* INSN is dependent on DEP_INSN. */
3000 spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
3004 /* The blockage pattern is used to prevent instructions from being
3005 moved across it and has no cost. */
3006 if (INSN_CODE (insn
) == CODE_FOR_blockage
3007 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3010 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3011 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3014 /* Make sure hbrps are spread out. */
3015 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3016 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3019 /* Make sure hints and hbrps are 2 cycles apart. */
3020 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3021 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3022 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3023 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3026 /* An hbrp has no real dependency on other insns. */
3027 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3028 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3031 /* Assuming that it is unlikely an argument register will be used in
3032 the first cycle of the called function, we reduce the cost for
3033 slightly better scheduling of dep_insn. When not hinted, the
3034 mispredicted branch would hide the cost as well. */
3037 rtx target
= get_branch_target (insn
);
3038 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3043 /* And when returning from a function, let's assume the return values
3044 are completed sooner too. */
3045 if (CALL_P (dep_insn
))
3048 /* Make sure an instruction that loads from the back chain is schedule
3049 away from the return instruction so a hint is more likely to get
3051 if (INSN_CODE (insn
) == CODE_FOR__return
3052 && (set
= single_set (dep_insn
))
3053 && GET_CODE (SET_DEST (set
)) == REG
3054 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3057 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3058 scheduler makes every insn in a block anti-dependent on the final
3059 jump_insn. We adjust here so higher cost insns will get scheduled
3061 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3062 return insn_cost (dep_insn
) - 3;
3067 /* Create a CONST_DOUBLE from a string. */
3069 spu_float_const (const char *string
, enum machine_mode mode
)
3071 REAL_VALUE_TYPE value
;
3072 value
= REAL_VALUE_ATOF (string
, mode
);
3073 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3077 spu_constant_address_p (rtx x
)
3079 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3080 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3081 || GET_CODE (x
) == HIGH
);
3084 static enum spu_immediate
3085 which_immediate_load (HOST_WIDE_INT val
)
3087 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3089 if (val
>= -0x8000 && val
<= 0x7fff)
3091 if (val
>= 0 && val
<= 0x3ffff)
3093 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3095 if ((val
& 0xffff) == 0)
3101 /* Return true when OP can be loaded by one of the il instructions, or
3102 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3104 immediate_load_p (rtx op
, enum machine_mode mode
)
3106 if (CONSTANT_P (op
))
3108 enum immediate_class c
= classify_immediate (op
, mode
);
3109 return c
== IC_IL1
|| c
== IC_IL1s
3110 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3115 /* Return true if the first SIZE bytes of arr is a constant that can be
3116 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3117 represent the size and offset of the instruction to use. */
3119 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3121 int cpat
, run
, i
, start
;
3125 for (i
= 0; i
< size
&& cpat
; i
++)
3133 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3135 else if (arr
[i
] == 0)
3137 while (arr
[i
+run
] == run
&& i
+run
< 16)
3139 if (run
!= 4 && run
!= 8)
3144 if ((i
& (run
-1)) != 0)
3151 if (cpat
&& (run
|| size
< 16))
3158 *pstart
= start
== -1 ? 16-run
: start
;
3164 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3165 it into a register. MODE is only valid when OP is a CONST_INT. */
3166 static enum immediate_class
3167 classify_immediate (rtx op
, enum machine_mode mode
)
3170 unsigned char arr
[16];
3171 int i
, j
, repeated
, fsmbi
, repeat
;
3173 gcc_assert (CONSTANT_P (op
));
3175 if (GET_MODE (op
) != VOIDmode
)
3176 mode
= GET_MODE (op
);
3178 /* A V4SI const_vector with all identical symbols is ok. */
3181 && GET_CODE (op
) == CONST_VECTOR
3182 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3183 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3184 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3185 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3186 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3187 op
= CONST_VECTOR_ELT (op
, 0);
3189 switch (GET_CODE (op
))
3193 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3196 /* We can never know if the resulting address fits in 18 bits and can be
3197 loaded with ila. For now, assume the address will not overflow if
3198 the displacement is "small" (fits 'K' constraint). */
3199 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3201 rtx sym
= XEXP (XEXP (op
, 0), 0);
3202 rtx cst
= XEXP (XEXP (op
, 0), 1);
3204 if (GET_CODE (sym
) == SYMBOL_REF
3205 && GET_CODE (cst
) == CONST_INT
3206 && satisfies_constraint_K (cst
))
3215 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3216 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3217 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3223 constant_to_array (mode
, op
, arr
);
3225 /* Check that each 4-byte slot is identical. */
3227 for (i
= 4; i
< 16; i
+= 4)
3228 for (j
= 0; j
< 4; j
++)
3229 if (arr
[j
] != arr
[i
+ j
])
3234 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3235 val
= trunc_int_for_mode (val
, SImode
);
3237 if (which_immediate_load (val
) != SPU_NONE
)
3241 /* Any mode of 2 bytes or smaller can be loaded with an il
3243 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3247 for (i
= 0; i
< 16 && fsmbi
; i
++)
3248 if (arr
[i
] != 0 && repeat
== 0)
3250 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3253 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3255 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3268 static enum spu_immediate
3269 which_logical_immediate (HOST_WIDE_INT val
)
3271 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3273 if (val
>= -0x200 && val
<= 0x1ff)
3275 if (val
>= 0 && val
<= 0xffff)
3277 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3279 val
= trunc_int_for_mode (val
, HImode
);
3280 if (val
>= -0x200 && val
<= 0x1ff)
3282 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3284 val
= trunc_int_for_mode (val
, QImode
);
3285 if (val
>= -0x200 && val
<= 0x1ff)
3292 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3295 const_vector_immediate_p (rtx x
)
3298 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3299 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3300 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3301 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3307 logical_immediate_p (rtx op
, enum machine_mode mode
)
3310 unsigned char arr
[16];
3313 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3314 || GET_CODE (op
) == CONST_VECTOR
);
3316 if (GET_CODE (op
) == CONST_VECTOR
3317 && !const_vector_immediate_p (op
))
3320 if (GET_MODE (op
) != VOIDmode
)
3321 mode
= GET_MODE (op
);
3323 constant_to_array (mode
, op
, arr
);
3325 /* Check that bytes are repeated. */
3326 for (i
= 4; i
< 16; i
+= 4)
3327 for (j
= 0; j
< 4; j
++)
3328 if (arr
[j
] != arr
[i
+ j
])
3331 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3332 val
= trunc_int_for_mode (val
, SImode
);
3334 i
= which_logical_immediate (val
);
3335 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3339 iohl_immediate_p (rtx op
, enum machine_mode mode
)
3342 unsigned char arr
[16];
3345 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3346 || GET_CODE (op
) == CONST_VECTOR
);
3348 if (GET_CODE (op
) == CONST_VECTOR
3349 && !const_vector_immediate_p (op
))
3352 if (GET_MODE (op
) != VOIDmode
)
3353 mode
= GET_MODE (op
);
3355 constant_to_array (mode
, op
, arr
);
3357 /* Check that bytes are repeated. */
3358 for (i
= 4; i
< 16; i
+= 4)
3359 for (j
= 0; j
< 4; j
++)
3360 if (arr
[j
] != arr
[i
+ j
])
3363 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3364 val
= trunc_int_for_mode (val
, SImode
);
3366 return val
>= 0 && val
<= 0xffff;
3370 arith_immediate_p (rtx op
, enum machine_mode mode
,
3371 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3374 unsigned char arr
[16];
3377 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3378 || GET_CODE (op
) == CONST_VECTOR
);
3380 if (GET_CODE (op
) == CONST_VECTOR
3381 && !const_vector_immediate_p (op
))
3384 if (GET_MODE (op
) != VOIDmode
)
3385 mode
= GET_MODE (op
);
3387 constant_to_array (mode
, op
, arr
);
3389 if (VECTOR_MODE_P (mode
))
3390 mode
= GET_MODE_INNER (mode
);
3392 bytes
= GET_MODE_SIZE (mode
);
3393 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3395 /* Check that bytes are repeated. */
3396 for (i
= bytes
; i
< 16; i
+= bytes
)
3397 for (j
= 0; j
< bytes
; j
++)
3398 if (arr
[j
] != arr
[i
+ j
])
3402 for (j
= 1; j
< bytes
; j
++)
3403 val
= (val
<< 8) | arr
[j
];
3405 val
= trunc_int_for_mode (val
, mode
);
3407 return val
>= low
&& val
<= high
;
3410 /* TRUE when op is an immediate and an exact power of 2, and given that
3411 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3412 all entries must be the same. */
3414 exp2_immediate_p (rtx op
, enum machine_mode mode
, int low
, int high
)
3416 enum machine_mode int_mode
;
3418 unsigned char arr
[16];
3421 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3422 || GET_CODE (op
) == CONST_VECTOR
);
3424 if (GET_CODE (op
) == CONST_VECTOR
3425 && !const_vector_immediate_p (op
))
3428 if (GET_MODE (op
) != VOIDmode
)
3429 mode
= GET_MODE (op
);
3431 constant_to_array (mode
, op
, arr
);
3433 if (VECTOR_MODE_P (mode
))
3434 mode
= GET_MODE_INNER (mode
);
3436 bytes
= GET_MODE_SIZE (mode
);
3437 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3439 /* Check that bytes are repeated. */
3440 for (i
= bytes
; i
< 16; i
+= bytes
)
3441 for (j
= 0; j
< bytes
; j
++)
3442 if (arr
[j
] != arr
[i
+ j
])
3446 for (j
= 1; j
< bytes
; j
++)
3447 val
= (val
<< 8) | arr
[j
];
3449 val
= trunc_int_for_mode (val
, int_mode
);
3451 /* Currently, we only handle SFmode */
3452 gcc_assert (mode
== SFmode
);
3455 int exp
= (val
>> 23) - 127;
3456 return val
> 0 && (val
& 0x007fffff) == 0
3457 && exp
>= low
&& exp
<= high
;
3462 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3465 ea_symbol_ref (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
3470 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3472 rtx plus
= XEXP (x
, 0);
3473 rtx op0
= XEXP (plus
, 0);
3474 rtx op1
= XEXP (plus
, 1);
3475 if (GET_CODE (op1
) == CONST_INT
)
3479 return (GET_CODE (x
) == SYMBOL_REF
3480 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3481 && TREE_CODE (decl
) == VAR_DECL
3482 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3486 - any 32-bit constant (SImode, SFmode)
3487 - any constant that can be generated with fsmbi (any mode)
3488 - a 64-bit constant where the high and low bits are identical
3490 - a 128-bit constant where the four 32-bit words match. */
3492 spu_legitimate_constant_p (enum machine_mode mode
, rtx x
)
3494 if (GET_CODE (x
) == HIGH
)
3497 /* Reject any __ea qualified reference. These can't appear in
3498 instructions but must be forced to the constant pool. */
3499 if (for_each_rtx (&x
, ea_symbol_ref
, 0))
3502 /* V4SI with all identical symbols is valid. */
3505 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3506 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3507 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3508 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3509 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3510 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3512 if (GET_CODE (x
) == CONST_VECTOR
3513 && !const_vector_immediate_p (x
))
3518 /* Valid address are:
3519 - symbol_ref, label_ref, const
3521 - reg + const_int, where const_int is 16 byte aligned
3522 - reg + reg, alignment doesn't matter
3523 The alignment matters in the reg+const case because lqd and stqd
3524 ignore the 4 least significant bits of the const. We only care about
3525 16 byte modes because the expand phase will change all smaller MEM
3526 references to TImode. */
3528 spu_legitimate_address_p (enum machine_mode mode
,
3529 rtx x
, bool reg_ok_strict
)
3531 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3533 && GET_CODE (x
) == AND
3534 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3535 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3537 switch (GET_CODE (x
))
3540 return !TARGET_LARGE_MEM
;
3544 /* Keep __ea references until reload so that spu_expand_mov can see them
3546 if (ea_symbol_ref (&x
, 0))
3547 return !reload_in_progress
&& !reload_completed
;
3548 return !TARGET_LARGE_MEM
;
3551 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3559 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3564 rtx op0
= XEXP (x
, 0);
3565 rtx op1
= XEXP (x
, 1);
3566 if (GET_CODE (op0
) == SUBREG
)
3567 op0
= XEXP (op0
, 0);
3568 if (GET_CODE (op1
) == SUBREG
)
3569 op1
= XEXP (op1
, 0);
3570 if (GET_CODE (op0
) == REG
3571 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3572 && GET_CODE (op1
) == CONST_INT
3573 && ((INTVAL (op1
) >= -0x2000 && INTVAL (op1
) <= 0x1fff)
3574 /* If virtual registers are involved, the displacement will
3575 change later on anyway, so checking would be premature.
3576 Reload will make sure the final displacement after
3577 register elimination is OK. */
3578 || op0
== arg_pointer_rtx
3579 || op0
== frame_pointer_rtx
3580 || op0
== virtual_stack_vars_rtx
)
3581 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3583 if (GET_CODE (op0
) == REG
3584 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3585 && GET_CODE (op1
) == REG
3586 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3597 /* Like spu_legitimate_address_p, except with named addresses. */
3599 spu_addr_space_legitimate_address_p (enum machine_mode mode
, rtx x
,
3600 bool reg_ok_strict
, addr_space_t as
)
3602 if (as
== ADDR_SPACE_EA
)
3603 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3605 else if (as
!= ADDR_SPACE_GENERIC
)
3608 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3611 /* When the address is reg + const_int, force the const_int into a
3614 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3615 enum machine_mode mode ATTRIBUTE_UNUSED
)
3618 /* Make sure both operands are registers. */
3619 if (GET_CODE (x
) == PLUS
)
3623 if (ALIGNED_SYMBOL_REF_P (op0
))
3625 op0
= force_reg (Pmode
, op0
);
3626 mark_reg_pointer (op0
, 128);
3628 else if (GET_CODE (op0
) != REG
)
3629 op0
= force_reg (Pmode
, op0
);
3630 if (ALIGNED_SYMBOL_REF_P (op1
))
3632 op1
= force_reg (Pmode
, op1
);
3633 mark_reg_pointer (op1
, 128);
3635 else if (GET_CODE (op1
) != REG
)
3636 op1
= force_reg (Pmode
, op1
);
3637 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3642 /* Like spu_legitimate_address, except with named address support. */
3644 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, enum machine_mode mode
,
3647 if (as
!= ADDR_SPACE_GENERIC
)
3650 return spu_legitimize_address (x
, oldx
, mode
);
3653 /* Reload reg + const_int for out-of-range displacements. */
3655 spu_legitimize_reload_address (rtx ad
, enum machine_mode mode ATTRIBUTE_UNUSED
,
3656 int opnum
, int type
)
3658 bool removed_and
= false;
3660 if (GET_CODE (ad
) == AND
3661 && CONST_INT_P (XEXP (ad
, 1))
3662 && INTVAL (XEXP (ad
, 1)) == (HOST_WIDE_INT
) - 16)
3668 if (GET_CODE (ad
) == PLUS
3669 && REG_P (XEXP (ad
, 0))
3670 && CONST_INT_P (XEXP (ad
, 1))
3671 && !(INTVAL (XEXP (ad
, 1)) >= -0x2000
3672 && INTVAL (XEXP (ad
, 1)) <= 0x1fff))
3674 /* Unshare the sum. */
3677 /* Reload the displacement. */
3678 push_reload (XEXP (ad
, 1), NULL_RTX
, &XEXP (ad
, 1), NULL
,
3679 BASE_REG_CLASS
, GET_MODE (ad
), VOIDmode
, 0, 0,
3680 opnum
, (enum reload_type
) type
);
3682 /* Add back AND for alignment if we stripped it. */
3684 ad
= gen_rtx_AND (GET_MODE (ad
), ad
, GEN_INT (-16));
3692 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3693 struct attribute_spec.handler. */
3695 spu_handle_fndecl_attribute (tree
* node
,
3697 tree args ATTRIBUTE_UNUSED
,
3698 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3700 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3702 warning (0, "%qE attribute only applies to functions",
3704 *no_add_attrs
= true;
3710 /* Handle the "vector" attribute. */
3712 spu_handle_vector_attribute (tree
* node
, tree name
,
3713 tree args ATTRIBUTE_UNUSED
,
3714 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3716 tree type
= *node
, result
= NULL_TREE
;
3717 enum machine_mode mode
;
3720 while (POINTER_TYPE_P (type
)
3721 || TREE_CODE (type
) == FUNCTION_TYPE
3722 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3723 type
= TREE_TYPE (type
);
3725 mode
= TYPE_MODE (type
);
3727 unsigned_p
= TYPE_UNSIGNED (type
);
3731 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3734 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3737 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3740 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3743 result
= V4SF_type_node
;
3746 result
= V2DF_type_node
;
3752 /* Propagate qualifiers attached to the element type
3753 onto the vector type. */
3754 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3755 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3757 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3760 warning (0, "%qE attribute ignored", name
);
3762 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3767 /* Return nonzero if FUNC is a naked function. */
3769 spu_naked_function_p (tree func
)
3773 if (TREE_CODE (func
) != FUNCTION_DECL
)
3776 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3777 return a
!= NULL_TREE
;
3781 spu_initial_elimination_offset (int from
, int to
)
3783 int saved_regs_size
= spu_saved_regs_size ();
3785 if (!crtl
->is_leaf
|| crtl
->outgoing_args_size
3786 || get_frame_size () || saved_regs_size
)
3787 sp_offset
= STACK_POINTER_OFFSET
;
3788 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3789 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3790 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3791 return get_frame_size ();
3792 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3793 return sp_offset
+ crtl
->outgoing_args_size
3794 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3795 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3796 return get_frame_size () + saved_regs_size
+ sp_offset
;
3802 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3804 enum machine_mode mode
= TYPE_MODE (type
);
3805 int byte_size
= ((mode
== BLKmode
)
3806 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3808 /* Make sure small structs are left justified in a register. */
3809 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3810 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3812 enum machine_mode smode
;
3815 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3816 int n
= byte_size
/ UNITS_PER_WORD
;
3817 v
= rtvec_alloc (nregs
);
3818 for (i
= 0; i
< n
; i
++)
3820 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3821 gen_rtx_REG (TImode
,
3824 GEN_INT (UNITS_PER_WORD
* i
));
3825 byte_size
-= UNITS_PER_WORD
;
3833 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3835 gen_rtx_EXPR_LIST (VOIDmode
,
3836 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3837 GEN_INT (UNITS_PER_WORD
* n
));
3839 return gen_rtx_PARALLEL (mode
, v
);
3841 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3845 spu_function_arg (cumulative_args_t cum_v
,
3846 enum machine_mode mode
,
3847 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3849 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3852 if (*cum
>= MAX_REGISTER_ARGS
)
3855 byte_size
= ((mode
== BLKmode
)
3856 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3858 /* The ABI does not allow parameters to be passed partially in
3859 reg and partially in stack. */
3860 if ((*cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3863 /* Make sure small structs are left justified in a register. */
3864 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3865 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3867 enum machine_mode smode
;
3871 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3872 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3873 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ *cum
),
3875 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3878 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ *cum
);
3882 spu_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
3883 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3885 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3887 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
3890 ? ((int_size_in_bytes (type
) + 15) / 16)
3893 : HARD_REGNO_NREGS (cum
, mode
));
3896 /* Variable sized types are passed by reference. */
3898 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
3899 enum machine_mode mode ATTRIBUTE_UNUSED
,
3900 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3902 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3908 /* Create and return the va_list datatype.
3910 On SPU, va_list is an array type equivalent to
3912 typedef struct __va_list_tag
3914 void *__args __attribute__((__aligned(16)));
3915 void *__skip __attribute__((__aligned(16)));
3919 where __args points to the arg that will be returned by the next
3920 va_arg(), and __skip points to the previous stack frame such that
3921 when __args == __skip we should advance __args by 32 bytes. */
3923 spu_build_builtin_va_list (void)
3925 tree f_args
, f_skip
, record
, type_decl
;
3928 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3931 build_decl (BUILTINS_LOCATION
,
3932 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3934 f_args
= build_decl (BUILTINS_LOCATION
,
3935 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3936 f_skip
= build_decl (BUILTINS_LOCATION
,
3937 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3939 DECL_FIELD_CONTEXT (f_args
) = record
;
3940 DECL_ALIGN (f_args
) = 128;
3941 DECL_USER_ALIGN (f_args
) = 1;
3943 DECL_FIELD_CONTEXT (f_skip
) = record
;
3944 DECL_ALIGN (f_skip
) = 128;
3945 DECL_USER_ALIGN (f_skip
) = 1;
3947 TYPE_STUB_DECL (record
) = type_decl
;
3948 TYPE_NAME (record
) = type_decl
;
3949 TYPE_FIELDS (record
) = f_args
;
3950 DECL_CHAIN (f_args
) = f_skip
;
3952 /* We know this is being padded and we want it too. It is an internal
3953 type so hide the warnings from the user. */
3955 warn_padded
= false;
3957 layout_type (record
);
3961 /* The correct type is an array type of one element. */
3962 return build_array_type (record
, build_index_type (size_zero_node
));
3965 /* Implement va_start by filling the va_list structure VALIST.
3966 NEXTARG points to the first anonymous stack argument.
3968 The following global variables are used to initialize
3969 the va_list structure:
3972 the CUMULATIVE_ARGS for this function
3974 crtl->args.arg_offset_rtx:
3975 holds the offset of the first anonymous stack argument
3976 (relative to the virtual arg pointer). */
3979 spu_va_start (tree valist
, rtx nextarg
)
3981 tree f_args
, f_skip
;
3984 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3985 f_skip
= DECL_CHAIN (f_args
);
3987 valist
= build_simple_mem_ref (valist
);
3989 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3991 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3993 /* Find the __args area. */
3994 t
= make_tree (TREE_TYPE (args
), nextarg
);
3995 if (crtl
->args
.pretend_args_size
> 0)
3996 t
= fold_build_pointer_plus_hwi (t
, -STACK_POINTER_OFFSET
);
3997 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
3998 TREE_SIDE_EFFECTS (t
) = 1;
3999 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4001 /* Find the __skip area. */
4002 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4003 t
= fold_build_pointer_plus_hwi (t
, (crtl
->args
.pretend_args_size
4004 - STACK_POINTER_OFFSET
));
4005 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4006 TREE_SIDE_EFFECTS (t
) = 1;
4007 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4010 /* Gimplify va_arg by updating the va_list structure
4011 VALIST as required to retrieve an argument of type
4012 TYPE, and returning that argument.
4014 ret = va_arg(VALIST, TYPE);
4016 generates code equivalent to:
4018 paddedsize = (sizeof(TYPE) + 15) & -16;
4019 if (VALIST.__args + paddedsize > VALIST.__skip
4020 && VALIST.__args <= VALIST.__skip)
4021 addr = VALIST.__skip + 32;
4023 addr = VALIST.__args;
4024 VALIST.__args = addr + paddedsize;
4025 ret = *(TYPE *)addr;
4028 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4029 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4031 tree f_args
, f_skip
;
4033 HOST_WIDE_INT size
, rsize
;
4035 bool pass_by_reference_p
;
4037 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4038 f_skip
= DECL_CHAIN (f_args
);
4040 valist
= build_simple_mem_ref (valist
);
4042 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4044 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4046 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4048 /* if an object is dynamically sized, a pointer to it is passed
4049 instead of the object itself. */
4050 pass_by_reference_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4052 if (pass_by_reference_p
)
4053 type
= build_pointer_type (type
);
4054 size
= int_size_in_bytes (type
);
4055 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4057 /* build conditional expression to calculate addr. The expression
4058 will be gimplified later. */
4059 tmp
= fold_build_pointer_plus_hwi (unshare_expr (args
), rsize
);
4060 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4061 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4062 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4063 unshare_expr (skip
)));
4065 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4066 fold_build_pointer_plus_hwi (unshare_expr (skip
), 32),
4067 unshare_expr (args
));
4069 gimplify_assign (addr
, tmp
, pre_p
);
4071 /* update VALIST.__args */
4072 tmp
= fold_build_pointer_plus_hwi (addr
, rsize
);
4073 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4075 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4078 if (pass_by_reference_p
)
4079 addr
= build_va_arg_indirect_ref (addr
);
4081 return build_va_arg_indirect_ref (addr
);
4084 /* Save parameter registers starting with the register that corresponds
4085 to the first unnamed parameters. If the first unnamed parameter is
4086 in the stack then save no registers. Set pretend_args_size to the
4087 amount of space needed to save the registers. */
4089 spu_setup_incoming_varargs (cumulative_args_t cum
, enum machine_mode mode
,
4090 tree type
, int *pretend_size
, int no_rtl
)
4097 int ncum
= *get_cumulative_args (cum
);
4099 /* cum currently points to the last named argument, we want to
4100 start at the next argument. */
4101 spu_function_arg_advance (pack_cumulative_args (&ncum
), mode
, type
, true);
4103 offset
= -STACK_POINTER_OFFSET
;
4104 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4106 tmp
= gen_frame_mem (V4SImode
,
4107 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4109 emit_move_insn (tmp
,
4110 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4113 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4118 spu_conditional_register_usage (void)
4122 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4123 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4127 /* This is called any time we inspect the alignment of a register for
4130 reg_aligned_for_addr (rtx x
)
4133 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4134 return REGNO_POINTER_ALIGN (regno
) >= 128;
4137 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4138 into its SYMBOL_REF_FLAGS. */
4140 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4142 default_encode_section_info (decl
, rtl
, first
);
4144 /* If a variable has a forced alignment to < 16 bytes, mark it with
4145 SYMBOL_FLAG_ALIGN1. */
4146 if (TREE_CODE (decl
) == VAR_DECL
4147 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4148 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4151 /* Return TRUE if we are certain the mem refers to a complete object
4152 which is both 16-byte aligned and padded to a 16-byte boundary. This
4153 would make it safe to store with a single instruction.
4154 We guarantee the alignment and padding for static objects by aligning
4155 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4156 FIXME: We currently cannot guarantee this for objects on the stack
4157 because assign_parm_setup_stack calls assign_stack_local with the
4158 alignment of the parameter mode and in that case the alignment never
4159 gets adjusted by LOCAL_ALIGNMENT. */
4161 store_with_one_insn_p (rtx mem
)
4163 enum machine_mode mode
= GET_MODE (mem
);
4164 rtx addr
= XEXP (mem
, 0);
4165 if (mode
== BLKmode
)
4167 if (GET_MODE_SIZE (mode
) >= 16)
4169 /* Only static objects. */
4170 if (GET_CODE (addr
) == SYMBOL_REF
)
4172 /* We use the associated declaration to make sure the access is
4173 referring to the whole object.
4174 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4175 if it is necessary. Will there be cases where one exists, and
4176 the other does not? Will there be cases where both exist, but
4177 have different types? */
4178 tree decl
= MEM_EXPR (mem
);
4180 && TREE_CODE (decl
) == VAR_DECL
4181 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4183 decl
= SYMBOL_REF_DECL (addr
);
4185 && TREE_CODE (decl
) == VAR_DECL
4186 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4192 /* Return 1 when the address is not valid for a simple load and store as
4193 required by the '_mov*' patterns. We could make this less strict
4194 for loads, but we prefer mem's to look the same so they are more
4195 likely to be merged. */
4197 address_needs_split (rtx mem
)
4199 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4200 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4201 || !(store_with_one_insn_p (mem
)
4202 || mem_is_padded_component_ref (mem
))))
4208 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4209 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4210 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4212 /* MEM is known to be an __ea qualified memory access. Emit a call to
4213 fetch the ppu memory to local store, and return its address in local
4217 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4221 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4222 if (!cache_fetch_dirty
)
4223 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4224 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4225 2, ea_addr
, EAmode
, ndirty
, SImode
);
4230 cache_fetch
= init_one_libfunc ("__cache_fetch");
4231 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4232 1, ea_addr
, EAmode
);
4236 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4237 dirty bit marking, inline.
4239 The cache control data structure is an array of
4241 struct __cache_tag_array
4243 unsigned int tag_lo[4];
4244 unsigned int tag_hi[4];
4245 void *data_pointer[4];
4247 vector unsigned short dirty_bits[4];
4251 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4255 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4256 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4257 rtx index_mask
= gen_reg_rtx (SImode
);
4258 rtx tag_arr
= gen_reg_rtx (Pmode
);
4259 rtx splat_mask
= gen_reg_rtx (TImode
);
4260 rtx splat
= gen_reg_rtx (V4SImode
);
4261 rtx splat_hi
= NULL_RTX
;
4262 rtx tag_index
= gen_reg_rtx (Pmode
);
4263 rtx block_off
= gen_reg_rtx (SImode
);
4264 rtx tag_addr
= gen_reg_rtx (Pmode
);
4265 rtx tag
= gen_reg_rtx (V4SImode
);
4266 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4267 rtx cache_tag_hi
= NULL_RTX
;
4268 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4269 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4270 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4271 rtx tag_equal_hi
= NULL_RTX
;
4272 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4273 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4274 rtx eq_index
= gen_reg_rtx (SImode
);
4275 rtx bcomp
, hit_label
, hit_ref
, cont_label
, insn
;
4277 if (spu_ea_model
!= 32)
4279 splat_hi
= gen_reg_rtx (V4SImode
);
4280 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4281 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4284 emit_move_insn (index_mask
, plus_constant (Pmode
, tag_size_sym
, -128));
4285 emit_move_insn (tag_arr
, tag_arr_sym
);
4286 v
= 0x0001020300010203LL
;
4287 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4288 ea_addr_si
= ea_addr
;
4289 if (spu_ea_model
!= 32)
4290 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4292 /* tag_index = ea_addr & (tag_array_size - 128) */
4293 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4295 /* splat ea_addr to all 4 slots. */
4296 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4297 /* Similarly for high 32 bits of ea_addr. */
4298 if (spu_ea_model
!= 32)
4299 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4301 /* block_off = ea_addr & 127 */
4302 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4304 /* tag_addr = tag_arr + tag_index */
4305 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4307 /* Read cache tags. */
4308 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4309 if (spu_ea_model
!= 32)
4310 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4311 plus_constant (Pmode
,
4314 /* tag = ea_addr & -128 */
4315 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4317 /* Read all four cache data pointers. */
4318 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4319 plus_constant (Pmode
,
4323 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4324 if (spu_ea_model
!= 32)
4326 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4327 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4330 /* At most one of the tags compare equal, so tag_equal has one
4331 32-bit slot set to all 1's, with the other slots all zero.
4332 gbb picks off low bit from each byte in the 128-bit registers,
4333 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4335 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4336 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4338 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4339 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4341 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4342 (rotating eq_index mod 16 bytes). */
4343 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4344 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4346 /* Add block offset to form final data address. */
4347 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4349 /* Check that we did hit. */
4350 hit_label
= gen_label_rtx ();
4351 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4352 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4353 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4354 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4356 /* Say that this branch is very likely to happen. */
4357 v
= REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100 - 1;
4358 add_int_reg_note (insn
, REG_BR_PROB
, v
);
4360 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4361 cont_label
= gen_label_rtx ();
4362 emit_jump_insn (gen_jump (cont_label
));
4365 emit_label (hit_label
);
4370 rtx dirty_bits
= gen_reg_rtx (TImode
);
4371 rtx dirty_off
= gen_reg_rtx (SImode
);
4372 rtx dirty_128
= gen_reg_rtx (TImode
);
4373 rtx neg_block_off
= gen_reg_rtx (SImode
);
4375 /* Set up mask with one dirty bit per byte of the mem we are
4376 writing, starting from top bit. */
4378 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4379 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4384 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4386 /* Form index into cache dirty_bits. eq_index is one of
4387 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4388 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4389 offset to each of the four dirty_bits elements. */
4390 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4392 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4394 /* Rotate bit mask to proper bit. */
4395 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4396 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4397 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4399 /* Or in the new dirty bits. */
4400 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4403 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4406 emit_label (cont_label
);
4410 expand_ea_mem (rtx mem
, bool is_store
)
4413 rtx data_addr
= gen_reg_rtx (Pmode
);
4416 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4417 if (optimize_size
|| optimize
== 0)
4418 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4420 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4422 if (ea_alias_set
== -1)
4423 ea_alias_set
= new_alias_set ();
4425 /* We generate a new MEM RTX to refer to the copy of the data
4426 in the cache. We do not copy memory attributes (except the
4427 alignment) from the original MEM, as they may no longer apply
4428 to the cache copy. */
4429 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4430 set_mem_alias_set (new_mem
, ea_alias_set
);
4431 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4437 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
4439 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4441 /* Perform the move in the destination SUBREG's inner mode. */
4442 ops
[0] = SUBREG_REG (ops
[0]);
4443 mode
= GET_MODE (ops
[0]);
4444 ops
[1] = gen_lowpart_common (mode
, ops
[1]);
4445 gcc_assert (ops
[1]);
4448 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4450 rtx from
= SUBREG_REG (ops
[1]);
4451 enum machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4453 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4454 && GET_MODE_CLASS (imode
) == MODE_INT
4455 && subreg_lowpart_p (ops
[1]));
4457 if (GET_MODE_SIZE (imode
) < 4)
4459 if (imode
!= GET_MODE (from
))
4460 from
= gen_rtx_SUBREG (imode
, from
, 0);
4462 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4464 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4466 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4469 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4473 /* At least one of the operands needs to be a register. */
4474 if ((reload_in_progress
| reload_completed
) == 0
4475 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4477 rtx temp
= force_reg (mode
, ops
[1]);
4478 emit_move_insn (ops
[0], temp
);
4481 if (reload_in_progress
|| reload_completed
)
4483 if (CONSTANT_P (ops
[1]))
4484 return spu_split_immediate (ops
);
4488 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4490 if (GET_CODE (ops
[1]) == CONST_INT
)
4492 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4493 if (val
!= INTVAL (ops
[1]))
4495 emit_move_insn (ops
[0], GEN_INT (val
));
4501 if (MEM_ADDR_SPACE (ops
[0]))
4502 ops
[0] = expand_ea_mem (ops
[0], true);
4503 return spu_split_store (ops
);
4507 if (MEM_ADDR_SPACE (ops
[1]))
4508 ops
[1] = expand_ea_mem (ops
[1], false);
4509 return spu_split_load (ops
);
4516 spu_convert_move (rtx dst
, rtx src
)
4518 enum machine_mode mode
= GET_MODE (dst
);
4519 enum machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4521 gcc_assert (GET_MODE (src
) == TImode
);
4522 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4523 emit_insn (gen_rtx_SET (VOIDmode
, reg
,
4524 gen_rtx_TRUNCATE (int_mode
,
4525 gen_rtx_LSHIFTRT (TImode
, src
,
4526 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4527 if (int_mode
!= mode
)
4529 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4530 emit_move_insn (dst
, reg
);
4534 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4535 the address from SRC and SRC+16. Return a REG or CONST_INT that
4536 specifies how many bytes to rotate the loaded registers, plus any
4537 extra from EXTRA_ROTQBY. The address and rotate amounts are
4538 normalized to improve merging of loads and rotate computations. */
4540 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4542 rtx addr
= XEXP (src
, 0);
4543 rtx p0
, p1
, rot
, addr0
, addr1
;
4549 if (MEM_ALIGN (src
) >= 128)
4550 /* Address is already aligned; simply perform a TImode load. */ ;
4551 else if (GET_CODE (addr
) == PLUS
)
4554 aligned reg + aligned reg => lqx
4555 aligned reg + unaligned reg => lqx, rotqby
4556 aligned reg + aligned const => lqd
4557 aligned reg + unaligned const => lqd, rotqbyi
4558 unaligned reg + aligned reg => lqx, rotqby
4559 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4560 unaligned reg + aligned const => lqd, rotqby
4561 unaligned reg + unaligned const -> not allowed by legitimate address
4563 p0
= XEXP (addr
, 0);
4564 p1
= XEXP (addr
, 1);
4565 if (!reg_aligned_for_addr (p0
))
4567 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4569 rot
= gen_reg_rtx (SImode
);
4570 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4572 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4576 && INTVAL (p1
) * BITS_PER_UNIT
4577 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4579 rot
= gen_reg_rtx (SImode
);
4580 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4585 rtx x
= gen_reg_rtx (SImode
);
4586 emit_move_insn (x
, p1
);
4587 if (!spu_arith_operand (p1
, SImode
))
4589 rot
= gen_reg_rtx (SImode
);
4590 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4591 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4599 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4601 rot_amt
= INTVAL (p1
) & 15;
4602 if (INTVAL (p1
) & -16)
4604 p1
= GEN_INT (INTVAL (p1
) & -16);
4605 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4610 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4614 else if (REG_P (addr
))
4616 if (!reg_aligned_for_addr (addr
))
4619 else if (GET_CODE (addr
) == CONST
)
4621 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4622 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4623 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4625 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4627 addr
= gen_rtx_CONST (Pmode
,
4628 gen_rtx_PLUS (Pmode
,
4629 XEXP (XEXP (addr
, 0), 0),
4630 GEN_INT (rot_amt
& -16)));
4632 addr
= XEXP (XEXP (addr
, 0), 0);
4636 rot
= gen_reg_rtx (Pmode
);
4637 emit_move_insn (rot
, addr
);
4640 else if (GET_CODE (addr
) == CONST_INT
)
4642 rot_amt
= INTVAL (addr
);
4643 addr
= GEN_INT (rot_amt
& -16);
4645 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4647 rot
= gen_reg_rtx (Pmode
);
4648 emit_move_insn (rot
, addr
);
4651 rot_amt
+= extra_rotby
;
4657 rtx x
= gen_reg_rtx (SImode
);
4658 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4662 if (!rot
&& rot_amt
)
4663 rot
= GEN_INT (rot_amt
);
4665 addr0
= copy_rtx (addr
);
4666 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4667 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4671 addr1
= plus_constant (SImode
, copy_rtx (addr
), 16);
4672 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4673 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4680 spu_split_load (rtx
* ops
)
4682 enum machine_mode mode
= GET_MODE (ops
[0]);
4683 rtx addr
, load
, rot
;
4686 if (GET_MODE_SIZE (mode
) >= 16)
4689 addr
= XEXP (ops
[1], 0);
4690 gcc_assert (GET_CODE (addr
) != AND
);
4692 if (!address_needs_split (ops
[1]))
4694 ops
[1] = change_address (ops
[1], TImode
, addr
);
4695 load
= gen_reg_rtx (TImode
);
4696 emit_insn (gen__movti (load
, ops
[1]));
4697 spu_convert_move (ops
[0], load
);
4701 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4703 load
= gen_reg_rtx (TImode
);
4704 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4707 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4709 spu_convert_move (ops
[0], load
);
4714 spu_split_store (rtx
* ops
)
4716 enum machine_mode mode
= GET_MODE (ops
[0]);
4718 rtx addr
, p0
, p1
, p1_lo
, smem
;
4722 if (GET_MODE_SIZE (mode
) >= 16)
4725 addr
= XEXP (ops
[0], 0);
4726 gcc_assert (GET_CODE (addr
) != AND
);
4728 if (!address_needs_split (ops
[0]))
4730 reg
= gen_reg_rtx (TImode
);
4731 emit_insn (gen_spu_convert (reg
, ops
[1]));
4732 ops
[0] = change_address (ops
[0], TImode
, addr
);
4733 emit_move_insn (ops
[0], reg
);
4737 if (GET_CODE (addr
) == PLUS
)
4740 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4741 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4742 aligned reg + aligned const => lqd, c?d, shuf, stqx
4743 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4744 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4745 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4746 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4747 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4750 p0
= XEXP (addr
, 0);
4751 p1
= p1_lo
= XEXP (addr
, 1);
4752 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4754 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4755 if (reg_aligned_for_addr (p0
))
4757 p1
= GEN_INT (INTVAL (p1
) & -16);
4758 if (p1
== const0_rtx
)
4761 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4765 rtx x
= gen_reg_rtx (SImode
);
4766 emit_move_insn (x
, p1
);
4767 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4771 else if (REG_P (addr
))
4775 p1
= p1_lo
= const0_rtx
;
4780 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4781 p1
= 0; /* aform doesn't use p1 */
4783 if (ALIGNED_SYMBOL_REF_P (addr
))
4785 else if (GET_CODE (addr
) == CONST
4786 && GET_CODE (XEXP (addr
, 0)) == PLUS
4787 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4788 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4790 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4792 addr
= gen_rtx_CONST (Pmode
,
4793 gen_rtx_PLUS (Pmode
,
4794 XEXP (XEXP (addr
, 0), 0),
4795 GEN_INT (v
& -16)));
4797 addr
= XEXP (XEXP (addr
, 0), 0);
4798 p1_lo
= GEN_INT (v
& 15);
4800 else if (GET_CODE (addr
) == CONST_INT
)
4802 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4803 addr
= GEN_INT (INTVAL (addr
) & -16);
4807 p1_lo
= gen_reg_rtx (SImode
);
4808 emit_move_insn (p1_lo
, addr
);
4812 gcc_assert (aform
== 0 || aform
== 1);
4813 reg
= gen_reg_rtx (TImode
);
4815 scalar
= store_with_one_insn_p (ops
[0]);
4818 /* We could copy the flags from the ops[0] MEM to mem here,
4819 We don't because we want this load to be optimized away if
4820 possible, and copying the flags will prevent that in certain
4821 cases, e.g. consider the volatile flag. */
4823 rtx pat
= gen_reg_rtx (TImode
);
4824 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4825 set_mem_alias_set (lmem
, 0);
4826 emit_insn (gen_movti (reg
, lmem
));
4828 if (!p0
|| reg_aligned_for_addr (p0
))
4829 p0
= stack_pointer_rtx
;
4833 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4834 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4838 if (GET_CODE (ops
[1]) == REG
)
4839 emit_insn (gen_spu_convert (reg
, ops
[1]));
4840 else if (GET_CODE (ops
[1]) == SUBREG
)
4841 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4846 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4847 emit_insn (gen_ashlti3
4848 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4850 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4851 /* We can't use the previous alias set because the memory has changed
4852 size and can potentially overlap objects of other types. */
4853 set_mem_alias_set (smem
, 0);
4855 emit_insn (gen_movti (smem
, reg
));
4859 /* Return TRUE if X is MEM which is a struct member reference
4860 and the member can safely be loaded and stored with a single
4861 instruction because it is padded. */
4863 mem_is_padded_component_ref (rtx x
)
4865 tree t
= MEM_EXPR (x
);
4867 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4869 t
= TREE_OPERAND (t
, 1);
4870 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4871 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4873 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4874 r
= DECL_FIELD_CONTEXT (t
);
4875 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4877 /* Make sure they are the same mode */
4878 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4880 /* If there are no following fields then the field alignment assures
4881 the structure is padded to the alignment which means this field is
4883 if (TREE_CHAIN (t
) == 0)
4885 /* If the following field is also aligned then this field will be
4888 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4893 /* Parse the -mfixed-range= option string. */
4895 fix_range (const char *const_str
)
4898 char *str
, *dash
, *comma
;
4900 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4901 REG2 are either register names or register numbers. The effect
4902 of this option is to mark the registers in the range from REG1 to
4903 REG2 as ``fixed'' so they won't be used by the compiler. */
4905 i
= strlen (const_str
);
4906 str
= (char *) alloca (i
+ 1);
4907 memcpy (str
, const_str
, i
+ 1);
4911 dash
= strchr (str
, '-');
4914 warning (0, "value of -mfixed-range must have form REG1-REG2");
4918 comma
= strchr (dash
+ 1, ',');
4922 first
= decode_reg_name (str
);
4925 warning (0, "unknown register name: %s", str
);
4929 last
= decode_reg_name (dash
+ 1);
4932 warning (0, "unknown register name: %s", dash
+ 1);
4940 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4944 for (i
= first
; i
<= last
; ++i
)
4945 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4955 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4956 can be generated using the fsmbi instruction. */
4958 fsmbi_const_p (rtx x
)
4962 /* We can always choose TImode for CONST_INT because the high bits
4963 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4964 enum immediate_class c
= classify_immediate (x
, TImode
);
4965 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4970 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4971 can be generated using the cbd, chd, cwd or cdd instruction. */
4973 cpat_const_p (rtx x
, enum machine_mode mode
)
4977 enum immediate_class c
= classify_immediate (x
, mode
);
4978 return c
== IC_CPAT
;
4984 gen_cpat_const (rtx
* ops
)
4986 unsigned char dst
[16];
4987 int i
, offset
, shift
, isize
;
4988 if (GET_CODE (ops
[3]) != CONST_INT
4989 || GET_CODE (ops
[2]) != CONST_INT
4990 || (GET_CODE (ops
[1]) != CONST_INT
4991 && GET_CODE (ops
[1]) != REG
))
4993 if (GET_CODE (ops
[1]) == REG
4994 && (!REG_POINTER (ops
[1])
4995 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
4998 for (i
= 0; i
< 16; i
++)
5000 isize
= INTVAL (ops
[3]);
5003 else if (isize
== 2)
5007 offset
= (INTVAL (ops
[2]) +
5008 (GET_CODE (ops
[1]) ==
5009 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
5010 for (i
= 0; i
< isize
; i
++)
5011 dst
[offset
+ i
] = i
+ shift
;
5012 return array_to_constant (TImode
, dst
);
5015 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5016 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5017 than 16 bytes, the value is repeated across the rest of the array. */
5019 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
5024 memset (arr
, 0, 16);
5025 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5026 if (GET_CODE (x
) == CONST_INT
5027 || (GET_CODE (x
) == CONST_DOUBLE
5028 && (mode
== SFmode
|| mode
== DFmode
)))
5030 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5032 if (GET_CODE (x
) == CONST_DOUBLE
)
5033 val
= const_double_to_hwint (x
);
5036 first
= GET_MODE_SIZE (mode
) - 1;
5037 for (i
= first
; i
>= 0; i
--)
5039 arr
[i
] = val
& 0xff;
5042 /* Splat the constant across the whole array. */
5043 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5046 j
= (j
== first
) ? 0 : j
+ 1;
5049 else if (GET_CODE (x
) == CONST_DOUBLE
)
5051 val
= CONST_DOUBLE_LOW (x
);
5052 for (i
= 15; i
>= 8; i
--)
5054 arr
[i
] = val
& 0xff;
5057 val
= CONST_DOUBLE_HIGH (x
);
5058 for (i
= 7; i
>= 0; i
--)
5060 arr
[i
] = val
& 0xff;
5064 else if (GET_CODE (x
) == CONST_VECTOR
)
5068 mode
= GET_MODE_INNER (mode
);
5069 units
= CONST_VECTOR_NUNITS (x
);
5070 for (i
= 0; i
< units
; i
++)
5072 elt
= CONST_VECTOR_ELT (x
, i
);
5073 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5075 if (GET_CODE (elt
) == CONST_DOUBLE
)
5076 val
= const_double_to_hwint (elt
);
5079 first
= GET_MODE_SIZE (mode
) - 1;
5080 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5082 for (j
= first
; j
>= 0; j
--)
5084 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5094 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5095 smaller than 16 bytes, use the bytes that would represent that value
5096 in a register, e.g., for QImode return the value of arr[3]. */
5098 array_to_constant (enum machine_mode mode
, const unsigned char arr
[16])
5100 enum machine_mode inner_mode
;
5102 int units
, size
, i
, j
, k
;
5105 if (GET_MODE_CLASS (mode
) == MODE_INT
5106 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5108 j
= GET_MODE_SIZE (mode
);
5109 i
= j
< 4 ? 4 - j
: 0;
5110 for (val
= 0; i
< j
; i
++)
5111 val
= (val
<< 8) | arr
[i
];
5112 val
= trunc_int_for_mode (val
, mode
);
5113 return GEN_INT (val
);
5119 for (i
= high
= 0; i
< 8; i
++)
5120 high
= (high
<< 8) | arr
[i
];
5121 for (i
= 8, val
= 0; i
< 16; i
++)
5122 val
= (val
<< 8) | arr
[i
];
5123 return immed_double_const (val
, high
, TImode
);
5127 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5128 val
= trunc_int_for_mode (val
, SImode
);
5129 return hwint_to_const_double (SFmode
, val
);
5133 for (i
= 0, val
= 0; i
< 8; i
++)
5134 val
= (val
<< 8) | arr
[i
];
5135 return hwint_to_const_double (DFmode
, val
);
5138 if (!VECTOR_MODE_P (mode
))
5141 units
= GET_MODE_NUNITS (mode
);
5142 size
= GET_MODE_UNIT_SIZE (mode
);
5143 inner_mode
= GET_MODE_INNER (mode
);
5144 v
= rtvec_alloc (units
);
5146 for (k
= i
= 0; i
< units
; ++i
)
5149 for (j
= 0; j
< size
; j
++, k
++)
5150 val
= (val
<< 8) | arr
[k
];
5152 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5153 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5155 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5160 return gen_rtx_CONST_VECTOR (mode
, v
);
5164 reloc_diagnostic (rtx x
)
5167 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5170 if (GET_CODE (x
) == SYMBOL_REF
)
5171 decl
= SYMBOL_REF_DECL (x
);
5172 else if (GET_CODE (x
) == CONST
5173 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5174 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5176 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5177 if (decl
&& !DECL_P (decl
))
5180 /* The decl could be a string constant. */
5181 if (decl
&& DECL_P (decl
))
5184 /* We use last_assemble_variable_decl to get line information. It's
5185 not always going to be right and might not even be close, but will
5186 be right for the more common cases. */
5187 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5188 loc
= DECL_SOURCE_LOCATION (decl
);
5190 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5192 if (TARGET_WARN_RELOC
)
5194 "creating run-time relocation for %qD", decl
);
5197 "creating run-time relocation for %qD", decl
);
5201 if (TARGET_WARN_RELOC
)
5202 warning_at (input_location
, 0, "creating run-time relocation");
5204 error_at (input_location
, "creating run-time relocation");
5208 /* Hook into assemble_integer so we can generate an error for run-time
5209 relocations. The SPU ABI disallows them. */
5211 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5213 /* By default run-time relocations aren't supported, but we allow them
5214 in case users support it in their own run-time loader. And we provide
5215 a warning for those users that don't. */
5216 if ((GET_CODE (x
) == SYMBOL_REF
)
5217 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5218 reloc_diagnostic (x
);
5220 return default_assemble_integer (x
, size
, aligned_p
);
5224 spu_asm_globalize_label (FILE * file
, const char *name
)
5226 fputs ("\t.global\t", file
);
5227 assemble_name (file
, name
);
5232 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
,
5233 int opno ATTRIBUTE_UNUSED
, int *total
,
5234 bool speed ATTRIBUTE_UNUSED
)
5236 enum machine_mode mode
= GET_MODE (x
);
5237 int cost
= COSTS_N_INSNS (2);
5239 /* Folding to a CONST_VECTOR will use extra space but there might
5240 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5241 only if it allows us to fold away multiple insns. Changing the cost
5242 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5243 because this cost will only be compared against a single insn.
5244 if (code == CONST_VECTOR)
5245 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5248 /* Use defaults for float operations. Not accurate but good enough. */
5251 *total
= COSTS_N_INSNS (13);
5256 *total
= COSTS_N_INSNS (6);
5262 if (satisfies_constraint_K (x
))
5264 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5265 *total
= COSTS_N_INSNS (1);
5267 *total
= COSTS_N_INSNS (3);
5271 *total
= COSTS_N_INSNS (3);
5276 *total
= COSTS_N_INSNS (0);
5280 *total
= COSTS_N_INSNS (5);
5284 case FLOAT_TRUNCATE
:
5286 case UNSIGNED_FLOAT
:
5289 *total
= COSTS_N_INSNS (7);
5295 *total
= COSTS_N_INSNS (9);
5302 GET_CODE (XEXP (x
, 0)) ==
5303 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5304 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5306 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5308 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5309 cost
= COSTS_N_INSNS (14);
5310 if ((val
& 0xffff) == 0)
5311 cost
= COSTS_N_INSNS (9);
5312 else if (val
> 0 && val
< 0x10000)
5313 cost
= COSTS_N_INSNS (11);
5322 *total
= COSTS_N_INSNS (20);
5329 *total
= COSTS_N_INSNS (4);
5332 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5333 *total
= COSTS_N_INSNS (0);
5335 *total
= COSTS_N_INSNS (4);
5338 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5339 if (GET_MODE_CLASS (mode
) == MODE_INT
5340 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5341 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5342 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5347 static enum machine_mode
5348 spu_unwind_word_mode (void)
5353 /* Decide whether we can make a sibling call to a function. DECL is the
5354 declaration of the function being targeted by the call and EXP is the
5355 CALL_EXPR representing the call. */
5357 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5359 return decl
&& !TARGET_LARGE_MEM
;
5362 /* We need to correctly update the back chain pointer and the Available
5363 Stack Size (which is in the second slot of the sp register.) */
5365 spu_allocate_stack (rtx op0
, rtx op1
)
5368 rtx chain
= gen_reg_rtx (V4SImode
);
5369 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5370 rtx sp
= gen_reg_rtx (V4SImode
);
5371 rtx splatted
= gen_reg_rtx (V4SImode
);
5372 rtx pat
= gen_reg_rtx (TImode
);
5374 /* copy the back chain so we can save it back again. */
5375 emit_move_insn (chain
, stack_bot
);
5377 op1
= force_reg (SImode
, op1
);
5379 v
= 0x1020300010203ll
;
5380 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5381 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5383 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5384 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5386 if (flag_stack_check
)
5388 rtx avail
= gen_reg_rtx(SImode
);
5389 rtx result
= gen_reg_rtx(SImode
);
5390 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5391 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5392 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5395 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5397 emit_move_insn (stack_bot
, chain
);
5399 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5403 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5405 static unsigned char arr
[16] =
5406 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5407 rtx temp
= gen_reg_rtx (SImode
);
5408 rtx temp2
= gen_reg_rtx (SImode
);
5409 rtx temp3
= gen_reg_rtx (V4SImode
);
5410 rtx temp4
= gen_reg_rtx (V4SImode
);
5411 rtx pat
= gen_reg_rtx (TImode
);
5412 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5414 /* Restore the backchain from the first word, sp from the second. */
5415 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5416 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5418 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5420 /* Compute Available Stack Size for sp */
5421 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5422 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5424 /* Compute Available Stack Size for back chain */
5425 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5426 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5427 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5429 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5430 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5434 spu_init_libfuncs (void)
5436 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5437 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5438 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5439 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5440 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5441 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5442 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5443 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5444 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5445 set_optab_libfunc (clrsb_optab
, DImode
, "__clrsbdi2");
5446 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5447 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5449 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5450 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5452 set_optab_libfunc (addv_optab
, SImode
, "__addvsi3");
5453 set_optab_libfunc (subv_optab
, SImode
, "__subvsi3");
5454 set_optab_libfunc (smulv_optab
, SImode
, "__mulvsi3");
5455 set_optab_libfunc (sdivv_optab
, SImode
, "__divvsi3");
5456 set_optab_libfunc (negv_optab
, SImode
, "__negvsi2");
5457 set_optab_libfunc (absv_optab
, SImode
, "__absvsi2");
5458 set_optab_libfunc (addv_optab
, DImode
, "__addvdi3");
5459 set_optab_libfunc (subv_optab
, DImode
, "__subvdi3");
5460 set_optab_libfunc (smulv_optab
, DImode
, "__mulvdi3");
5461 set_optab_libfunc (sdivv_optab
, DImode
, "__divvdi3");
5462 set_optab_libfunc (negv_optab
, DImode
, "__negvdi2");
5463 set_optab_libfunc (absv_optab
, DImode
, "__absvdi2");
5465 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5466 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5467 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5468 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5469 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5470 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5473 /* Make a subreg, stripping any existing subreg. We could possibly just
5474 call simplify_subreg, but in this case we know what we want. */
5476 spu_gen_subreg (enum machine_mode mode
, rtx x
)
5478 if (GET_CODE (x
) == SUBREG
)
5480 if (GET_MODE (x
) == mode
)
5482 return gen_rtx_SUBREG (mode
, x
, 0);
5486 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5488 return (TYPE_MODE (type
) == BLKmode
5490 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5491 || int_size_in_bytes (type
) >
5492 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5495 /* Create the built-in types and functions */
5497 enum spu_function_code
5499 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5500 #include "spu-builtins.def"
5505 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5507 struct spu_builtin_description spu_builtins
[] = {
5508 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5509 {fcode, icode, name, type, params},
5510 #include "spu-builtins.def"
5514 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5516 /* Returns the spu builtin decl for CODE. */
5519 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5521 if (code
>= NUM_SPU_BUILTINS
)
5522 return error_mark_node
;
5524 return spu_builtin_decls
[code
];
5529 spu_init_builtins (void)
5531 struct spu_builtin_description
*d
;
5534 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5535 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5536 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5537 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5538 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5539 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5541 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5542 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5543 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5544 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5546 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5548 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5549 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5550 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5551 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5552 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5553 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5554 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5555 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5556 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5557 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5558 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5559 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5561 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5562 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5563 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5564 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5565 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5566 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5567 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5568 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5570 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5571 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5573 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5575 spu_builtin_types
[SPU_BTI_PTR
] =
5576 build_pointer_type (build_qualified_type
5578 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5580 /* For each builtin we build a new prototype. The tree code will make
5581 sure nodes are shared. */
5582 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5585 char name
[64]; /* build_function will make a copy. */
5591 /* Find last parm. */
5592 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5597 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5599 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5601 sprintf (name
, "__builtin_%s", d
->name
);
5602 spu_builtin_decls
[i
] =
5603 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5604 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5605 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5607 /* These builtins don't throw. */
5608 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5613 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5615 static unsigned char arr
[16] =
5616 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5618 rtx temp
= gen_reg_rtx (Pmode
);
5619 rtx temp2
= gen_reg_rtx (V4SImode
);
5620 rtx temp3
= gen_reg_rtx (V4SImode
);
5621 rtx pat
= gen_reg_rtx (TImode
);
5622 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5624 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5626 /* Restore the sp. */
5627 emit_move_insn (temp
, op1
);
5628 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5630 /* Compute available stack size for sp. */
5631 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5632 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5634 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5635 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5639 spu_safe_dma (HOST_WIDE_INT channel
)
5641 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5645 spu_builtin_splats (rtx ops
[])
5647 enum machine_mode mode
= GET_MODE (ops
[0]);
5648 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5650 unsigned char arr
[16];
5651 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5652 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5656 rtx reg
= gen_reg_rtx (TImode
);
5658 if (GET_CODE (ops
[1]) != REG
5659 && GET_CODE (ops
[1]) != SUBREG
)
5660 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5666 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5672 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5677 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5682 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5688 emit_move_insn (reg
, shuf
);
5689 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5694 spu_builtin_extract (rtx ops
[])
5696 enum machine_mode mode
;
5699 mode
= GET_MODE (ops
[1]);
5701 if (GET_CODE (ops
[2]) == CONST_INT
)
5706 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5709 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5712 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5715 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5718 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5721 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5729 from
= spu_gen_subreg (TImode
, ops
[1]);
5730 rot
= gen_reg_rtx (TImode
);
5731 tmp
= gen_reg_rtx (SImode
);
5736 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5739 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5740 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5744 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5748 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5753 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5755 emit_insn (gen_spu_convert (ops
[0], rot
));
5759 spu_builtin_insert (rtx ops
[])
5761 enum machine_mode mode
= GET_MODE (ops
[0]);
5762 enum machine_mode imode
= GET_MODE_INNER (mode
);
5763 rtx mask
= gen_reg_rtx (TImode
);
5766 if (GET_CODE (ops
[3]) == CONST_INT
)
5767 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5770 offset
= gen_reg_rtx (SImode
);
5771 emit_insn (gen_mulsi3
5772 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5775 (mask
, stack_pointer_rtx
, offset
,
5776 GEN_INT (GET_MODE_SIZE (imode
))));
5777 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5781 spu_builtin_promote (rtx ops
[])
5783 enum machine_mode mode
, imode
;
5784 rtx rot
, from
, offset
;
5787 mode
= GET_MODE (ops
[0]);
5788 imode
= GET_MODE_INNER (mode
);
5790 from
= gen_reg_rtx (TImode
);
5791 rot
= spu_gen_subreg (TImode
, ops
[0]);
5793 emit_insn (gen_spu_convert (from
, ops
[1]));
5795 if (GET_CODE (ops
[2]) == CONST_INT
)
5797 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5798 if (GET_MODE_SIZE (imode
) < 4)
5799 pos
+= 4 - GET_MODE_SIZE (imode
);
5800 offset
= GEN_INT (pos
& 15);
5804 offset
= gen_reg_rtx (SImode
);
5808 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5811 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5812 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5816 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5817 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5821 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5827 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5831 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
5833 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5834 rtx shuf
= gen_reg_rtx (V4SImode
);
5835 rtx insn
= gen_reg_rtx (V4SImode
);
5840 fnaddr
= force_reg (SImode
, fnaddr
);
5841 cxt
= force_reg (SImode
, cxt
);
5843 if (TARGET_LARGE_MEM
)
5845 rtx rotl
= gen_reg_rtx (V4SImode
);
5846 rtx mask
= gen_reg_rtx (V4SImode
);
5847 rtx bi
= gen_reg_rtx (SImode
);
5848 static unsigned char const shufa
[16] = {
5849 2, 3, 0, 1, 18, 19, 16, 17,
5850 0, 1, 2, 3, 16, 17, 18, 19
5852 static unsigned char const insna
[16] = {
5854 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5856 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5859 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5860 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5862 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5863 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5864 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5865 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5867 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5868 emit_move_insn (mem
, insn
);
5870 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5871 mem
= adjust_address (m_tramp
, Pmode
, 16);
5872 emit_move_insn (mem
, bi
);
5876 rtx scxt
= gen_reg_rtx (SImode
);
5877 rtx sfnaddr
= gen_reg_rtx (SImode
);
5878 static unsigned char const insna
[16] = {
5879 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5885 shufc
= gen_reg_rtx (TImode
);
5886 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5888 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5889 fits 18 bits and the last 4 are zeros. This will be true if
5890 the stack pointer is initialized to 0x3fff0 at program start,
5891 otherwise the ila instruction will be garbage. */
5893 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5894 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5896 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5897 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5898 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5900 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5901 emit_move_insn (mem
, insn
);
5903 emit_insn (gen_sync ());
5907 spu_warn_func_return (tree decl
)
5909 /* Naked functions are implemented entirely in assembly, including the
5910 return sequence, so suppress warnings about this. */
5911 return !spu_naked_function_p (decl
);
5915 spu_expand_sign_extend (rtx ops
[])
5917 unsigned char arr
[16];
5918 rtx pat
= gen_reg_rtx (TImode
);
5921 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5922 if (GET_MODE (ops
[1]) == QImode
)
5924 sign
= gen_reg_rtx (HImode
);
5925 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5926 for (i
= 0; i
< 16; i
++)
5932 for (i
= 0; i
< 16; i
++)
5934 switch (GET_MODE (ops
[1]))
5937 sign
= gen_reg_rtx (SImode
);
5938 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5940 arr
[last
- 1] = 0x02;
5943 sign
= gen_reg_rtx (SImode
);
5944 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5945 for (i
= 0; i
< 4; i
++)
5946 arr
[last
- i
] = 3 - i
;
5949 sign
= gen_reg_rtx (SImode
);
5950 c
= gen_reg_rtx (SImode
);
5951 emit_insn (gen_spu_convert (c
, ops
[1]));
5952 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5953 for (i
= 0; i
< 8; i
++)
5954 arr
[last
- i
] = 7 - i
;
5960 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5961 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5964 /* expand vector initialization. If there are any constant parts,
5965 load constant parts first. Then load any non-constant parts. */
5967 spu_expand_vector_init (rtx target
, rtx vals
)
5969 enum machine_mode mode
= GET_MODE (target
);
5970 int n_elts
= GET_MODE_NUNITS (mode
);
5972 bool all_same
= true;
5973 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5976 first
= XVECEXP (vals
, 0, 0);
5977 for (i
= 0; i
< n_elts
; ++i
)
5979 x
= XVECEXP (vals
, 0, i
);
5980 if (!(CONST_INT_P (x
)
5981 || GET_CODE (x
) == CONST_DOUBLE
5982 || GET_CODE (x
) == CONST_FIXED
))
5986 if (first_constant
== NULL_RTX
)
5989 if (i
> 0 && !rtx_equal_p (x
, first
))
5993 /* if all elements are the same, use splats to repeat elements */
5996 if (!CONSTANT_P (first
)
5997 && !register_operand (first
, GET_MODE (x
)))
5998 first
= force_reg (GET_MODE (first
), first
);
5999 emit_insn (gen_spu_splats (target
, first
));
6003 /* load constant parts */
6004 if (n_var
!= n_elts
)
6008 emit_move_insn (target
,
6009 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6013 rtx constant_parts_rtx
= copy_rtx (vals
);
6015 gcc_assert (first_constant
!= NULL_RTX
);
6016 /* fill empty slots with the first constant, this increases
6017 our chance of using splats in the recursive call below. */
6018 for (i
= 0; i
< n_elts
; ++i
)
6020 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6021 if (!(CONST_INT_P (x
)
6022 || GET_CODE (x
) == CONST_DOUBLE
6023 || GET_CODE (x
) == CONST_FIXED
))
6024 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6027 spu_expand_vector_init (target
, constant_parts_rtx
);
6031 /* load variable parts */
6034 rtx insert_operands
[4];
6036 insert_operands
[0] = target
;
6037 insert_operands
[2] = target
;
6038 for (i
= 0; i
< n_elts
; ++i
)
6040 x
= XVECEXP (vals
, 0, i
);
6041 if (!(CONST_INT_P (x
)
6042 || GET_CODE (x
) == CONST_DOUBLE
6043 || GET_CODE (x
) == CONST_FIXED
))
6045 if (!register_operand (x
, GET_MODE (x
)))
6046 x
= force_reg (GET_MODE (x
), x
);
6047 insert_operands
[1] = x
;
6048 insert_operands
[3] = GEN_INT (i
);
6049 spu_builtin_insert (insert_operands
);
6055 /* Return insn index for the vector compare instruction for given CODE,
6056 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6059 get_vec_cmp_insn (enum rtx_code code
,
6060 enum machine_mode dest_mode
,
6061 enum machine_mode op_mode
)
6067 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6068 return CODE_FOR_ceq_v16qi
;
6069 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6070 return CODE_FOR_ceq_v8hi
;
6071 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6072 return CODE_FOR_ceq_v4si
;
6073 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6074 return CODE_FOR_ceq_v4sf
;
6075 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6076 return CODE_FOR_ceq_v2df
;
6079 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6080 return CODE_FOR_cgt_v16qi
;
6081 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6082 return CODE_FOR_cgt_v8hi
;
6083 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6084 return CODE_FOR_cgt_v4si
;
6085 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6086 return CODE_FOR_cgt_v4sf
;
6087 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6088 return CODE_FOR_cgt_v2df
;
6091 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6092 return CODE_FOR_clgt_v16qi
;
6093 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6094 return CODE_FOR_clgt_v8hi
;
6095 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6096 return CODE_FOR_clgt_v4si
;
6104 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6105 DMODE is expected destination mode. This is a recursive function. */
6108 spu_emit_vector_compare (enum rtx_code rcode
,
6110 enum machine_mode dmode
)
6114 enum machine_mode dest_mode
;
6115 enum machine_mode op_mode
= GET_MODE (op1
);
6117 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6119 /* Floating point vector compare instructions uses destination V4SImode.
6120 Double floating point vector compare instructions uses destination V2DImode.
6121 Move destination to appropriate mode later. */
6122 if (dmode
== V4SFmode
)
6123 dest_mode
= V4SImode
;
6124 else if (dmode
== V2DFmode
)
6125 dest_mode
= V2DImode
;
6129 mask
= gen_reg_rtx (dest_mode
);
6130 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6132 if (vec_cmp_insn
== -1)
6134 bool swap_operands
= false;
6135 bool try_again
= false;
6140 swap_operands
= true;
6145 swap_operands
= true;
6155 /* Treat A != B as ~(A==B). */
6157 enum rtx_code rev_code
;
6158 enum insn_code nor_code
;
6161 rev_code
= reverse_condition_maybe_unordered (rcode
);
6162 rev_mask
= spu_emit_vector_compare (rev_code
, op0
, op1
, dest_mode
);
6164 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6165 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6166 emit_insn (GEN_FCN (nor_code
) (mask
, rev_mask
));
6167 if (dmode
!= dest_mode
)
6169 rtx temp
= gen_reg_rtx (dest_mode
);
6170 convert_move (temp
, mask
, 0);
6180 /* Try GT/GTU/LT/LTU OR EQ */
6183 enum insn_code ior_code
;
6184 enum rtx_code new_code
;
6188 case GE
: new_code
= GT
; break;
6189 case GEU
: new_code
= GTU
; break;
6190 case LE
: new_code
= LT
; break;
6191 case LEU
: new_code
= LTU
; break;
6196 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6197 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6199 ior_code
= optab_handler (ior_optab
, dest_mode
);
6200 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6201 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6202 if (dmode
!= dest_mode
)
6204 rtx temp
= gen_reg_rtx (dest_mode
);
6205 convert_move (temp
, mask
, 0);
6215 enum insn_code ior_code
;
6217 lt_rtx
= spu_emit_vector_compare (LT
, op0
, op1
, dest_mode
);
6218 gt_rtx
= spu_emit_vector_compare (GT
, op0
, op1
, dest_mode
);
6220 ior_code
= optab_handler (ior_optab
, dest_mode
);
6221 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6222 emit_insn (GEN_FCN (ior_code
) (mask
, lt_rtx
, gt_rtx
));
6223 if (dmode
!= dest_mode
)
6225 rtx temp
= gen_reg_rtx (dest_mode
);
6226 convert_move (temp
, mask
, 0);
6233 /* Implement as (A==A) & (B==B) */
6236 enum insn_code and_code
;
6238 a_rtx
= spu_emit_vector_compare (EQ
, op0
, op0
, dest_mode
);
6239 b_rtx
= spu_emit_vector_compare (EQ
, op1
, op1
, dest_mode
);
6241 and_code
= optab_handler (and_optab
, dest_mode
);
6242 gcc_assert (and_code
!= CODE_FOR_nothing
);
6243 emit_insn (GEN_FCN (and_code
) (mask
, a_rtx
, b_rtx
));
6244 if (dmode
!= dest_mode
)
6246 rtx temp
= gen_reg_rtx (dest_mode
);
6247 convert_move (temp
, mask
, 0);
6257 /* You only get two chances. */
6259 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6261 gcc_assert (vec_cmp_insn
!= -1);
6272 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6273 if (dmode
!= dest_mode
)
6275 rtx temp
= gen_reg_rtx (dest_mode
);
6276 convert_move (temp
, mask
, 0);
6283 /* Emit vector conditional expression.
6284 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6285 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6288 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6289 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6291 enum machine_mode dest_mode
= GET_MODE (dest
);
6292 enum rtx_code rcode
= GET_CODE (cond
);
6295 /* Get the vector mask for the given relational operations. */
6296 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6298 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6304 spu_force_reg (enum machine_mode mode
, rtx op
)
6307 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6309 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6310 || GET_MODE (op
) == BLKmode
)
6311 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6315 r
= force_reg (GET_MODE (op
), op
);
6316 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6318 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6323 x
= gen_reg_rtx (mode
);
6324 emit_insn (gen_spu_convert (x
, r
));
6329 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6331 HOST_WIDE_INT v
= 0;
6333 /* Check the range of immediate operands. */
6334 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6336 int range
= p
- SPU_BTI_7
;
6338 if (!CONSTANT_P (op
))
6339 error ("%s expects an integer literal in the range [%d, %d]",
6341 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6343 if (GET_CODE (op
) == CONST
6344 && (GET_CODE (XEXP (op
, 0)) == PLUS
6345 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6347 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6348 op
= XEXP (XEXP (op
, 0), 0);
6350 else if (GET_CODE (op
) == CONST_INT
)
6352 else if (GET_CODE (op
) == CONST_VECTOR
6353 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6354 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6356 /* The default for v is 0 which is valid in every range. */
6357 if (v
< spu_builtin_range
[range
].low
6358 || v
> spu_builtin_range
[range
].high
)
6359 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6361 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6370 /* This is only used in lqa, and stqa. Even though the insns
6371 encode 16 bits of the address (all but the 2 least
6372 significant), only 14 bits are used because it is masked to
6373 be 16 byte aligned. */
6377 /* This is used for lqr and stqr. */
6384 if (GET_CODE (op
) == LABEL_REF
6385 || (GET_CODE (op
) == SYMBOL_REF
6386 && SYMBOL_REF_FUNCTION_P (op
))
6387 || (v
& ((1 << lsbits
) - 1)) != 0)
6388 warning (0, "%d least significant bits of %s are ignored", lsbits
,
6395 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6396 rtx target
, rtx ops
[])
6398 enum insn_code icode
= (enum insn_code
) d
->icode
;
6401 /* Expand the arguments into rtl. */
6403 if (d
->parm
[0] != SPU_BTI_VOID
)
6406 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6408 tree arg
= CALL_EXPR_ARG (exp
, a
);
6411 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6414 gcc_assert (i
== insn_data
[icode
].n_generator_args
);
6419 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6420 tree exp
, rtx target
)
6424 enum insn_code icode
= (enum insn_code
) d
->icode
;
6425 enum machine_mode mode
, tmode
;
6430 /* Set up ops[] with values from arglist. */
6431 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6433 /* Handle the target operand which must be operand 0. */
6435 if (d
->parm
[0] != SPU_BTI_VOID
)
6438 /* We prefer the mode specified for the match_operand otherwise
6439 use the mode from the builtin function prototype. */
6440 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6441 if (tmode
== VOIDmode
)
6442 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6444 /* Try to use target because not using it can lead to extra copies
6445 and when we are using all of the registers extra copies leads
6447 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6450 target
= ops
[0] = gen_reg_rtx (tmode
);
6452 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6458 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6460 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6465 arg
= CALL_EXPR_ARG (exp
, 0);
6466 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6467 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6468 addr
= memory_address (mode
, op
);
6471 op
= gen_reg_rtx (GET_MODE (addr
));
6472 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6473 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6474 op
= gen_rtx_MEM (mode
, op
);
6476 pat
= GEN_FCN (icode
) (target
, op
);
6483 /* Ignore align_hint, but still expand it's args in case they have
6485 if (icode
== CODE_FOR_spu_align_hint
)
6488 /* Handle the rest of the operands. */
6489 for (p
= 1; i
< n_operands
; i
++, p
++)
6491 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6492 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6494 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6496 /* mode can be VOIDmode here for labels */
6498 /* For specific intrinsics with an immediate operand, e.g.,
6499 si_ai(), we sometimes need to convert the scalar argument to a
6500 vector argument by splatting the scalar. */
6501 if (VECTOR_MODE_P (mode
)
6502 && (GET_CODE (ops
[i
]) == CONST_INT
6503 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6504 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6506 if (GET_CODE (ops
[i
]) == CONST_INT
)
6507 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6510 rtx reg
= gen_reg_rtx (mode
);
6511 enum machine_mode imode
= GET_MODE_INNER (mode
);
6512 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6513 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6514 if (imode
!= GET_MODE (ops
[i
]))
6515 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6516 TYPE_UNSIGNED (spu_builtin_types
6518 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6523 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6525 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6526 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6532 pat
= GEN_FCN (icode
) (0);
6535 pat
= GEN_FCN (icode
) (ops
[0]);
6538 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6541 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6544 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6547 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6550 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6559 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6560 emit_call_insn (pat
);
6561 else if (d
->type
== B_JUMP
)
6563 emit_jump_insn (pat
);
6569 return_type
= spu_builtin_types
[d
->parm
[0]];
6570 if (d
->parm
[0] != SPU_BTI_VOID
6571 && GET_MODE (target
) != TYPE_MODE (return_type
))
6573 /* target is the return value. It should always be the mode of
6574 the builtin function prototype. */
6575 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6582 spu_expand_builtin (tree exp
,
6584 rtx subtarget ATTRIBUTE_UNUSED
,
6585 enum machine_mode mode ATTRIBUTE_UNUSED
,
6586 int ignore ATTRIBUTE_UNUSED
)
6588 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6589 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6590 struct spu_builtin_description
*d
;
6592 if (fcode
< NUM_SPU_BUILTINS
)
6594 d
= &spu_builtins
[fcode
];
6596 return spu_expand_builtin_1 (d
, exp
, target
);
6601 /* Implement targetm.vectorize.builtin_mask_for_load. */
6603 spu_builtin_mask_for_load (void)
6605 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6608 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6610 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6612 int misalign ATTRIBUTE_UNUSED
)
6616 switch (type_of_cost
)
6624 case cond_branch_not_taken
:
6626 case vec_promote_demote
:
6633 /* Load + rotate. */
6636 case unaligned_load
:
6639 case cond_branch_taken
:
6643 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6644 return elements
/ 2 + 1;
6651 /* Implement targetm.vectorize.init_cost. */
6654 spu_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
6656 unsigned *cost
= XNEWVEC (unsigned, 3);
6657 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
6661 /* Implement targetm.vectorize.add_stmt_cost. */
6664 spu_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6665 struct _stmt_vec_info
*stmt_info
, int misalign
,
6666 enum vect_cost_model_location where
)
6668 unsigned *cost
= (unsigned *) data
;
6669 unsigned retval
= 0;
6671 if (flag_vect_cost_model
)
6673 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6674 int stmt_cost
= spu_builtin_vectorization_cost (kind
, vectype
, misalign
);
6676 /* Statements in an inner loop relative to the loop being
6677 vectorized are weighted more heavily. The value here is
6678 arbitrary and could potentially be improved with analysis. */
6679 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6680 count
*= 50; /* FIXME. */
6682 retval
= (unsigned) (count
* stmt_cost
);
6683 cost
[where
] += retval
;
6689 /* Implement targetm.vectorize.finish_cost. */
6692 spu_finish_cost (void *data
, unsigned *prologue_cost
,
6693 unsigned *body_cost
, unsigned *epilogue_cost
)
6695 unsigned *cost
= (unsigned *) data
;
6696 *prologue_cost
= cost
[vect_prologue
];
6697 *body_cost
= cost
[vect_body
];
6698 *epilogue_cost
= cost
[vect_epilogue
];
6701 /* Implement targetm.vectorize.destroy_cost_data. */
6704 spu_destroy_cost_data (void *data
)
6709 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6710 after applying N number of iterations. This routine does not determine
6711 how may iterations are required to reach desired alignment. */
6714 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6719 /* All other types are naturally aligned. */
6723 /* Return the appropriate mode for a named address pointer. */
6724 static enum machine_mode
6725 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6729 case ADDR_SPACE_GENERIC
:
6738 /* Return the appropriate mode for a named address address. */
6739 static enum machine_mode
6740 spu_addr_space_address_mode (addr_space_t addrspace
)
6744 case ADDR_SPACE_GENERIC
:
6753 /* Determine if one named address space is a subset of another. */
6756 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6758 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6759 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6761 if (subset
== superset
)
6764 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6765 being subsets but instead as disjoint address spaces. */
6766 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6770 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6773 /* Convert from one address space to another. */
6775 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6777 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6778 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6780 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6781 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6783 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6787 ls
= gen_const_mem (DImode
,
6788 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6789 set_mem_align (ls
, 128);
6791 result
= gen_reg_rtx (Pmode
);
6792 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6793 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6794 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6795 ls
, const0_rtx
, Pmode
, 1);
6797 emit_insn (gen_subsi3 (result
, op
, ls
));
6802 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6806 ls
= gen_const_mem (DImode
,
6807 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6808 set_mem_align (ls
, 128);
6810 result
= gen_reg_rtx (EAmode
);
6811 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6812 op
= force_reg (Pmode
, op
);
6813 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6814 ls
, const0_rtx
, EAmode
, 1);
6815 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6817 if (EAmode
== SImode
)
6818 emit_insn (gen_addsi3 (result
, op
, ls
));
6820 emit_insn (gen_adddi3 (result
, op
, ls
));
6830 /* Count the total number of instructions in each pipe and return the
6831 maximum, which is used as the Minimum Iteration Interval (MII)
6832 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6833 -2 are instructions that can go in pipe0 or pipe1. */
6835 spu_sms_res_mii (struct ddg
*g
)
6838 unsigned t
[4] = {0, 0, 0, 0};
6840 for (i
= 0; i
< g
->num_nodes
; i
++)
6842 rtx insn
= g
->nodes
[i
].insn
;
6843 int p
= get_pipe (insn
) + 2;
6845 gcc_assert (p
>= 0);
6849 if (dump_file
&& INSN_P (insn
))
6850 fprintf (dump_file
, "i%d %s %d %d\n",
6852 insn_data
[INSN_CODE(insn
)].name
,
6856 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6858 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6863 spu_init_expanders (void)
6868 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6869 frame_pointer_needed is true. We don't know that until we're
6870 expanding the prologue. */
6871 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6873 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6874 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6875 to be treated as aligned, so generate them here. */
6876 r0
= gen_reg_rtx (SImode
);
6877 r1
= gen_reg_rtx (SImode
);
6878 mark_reg_pointer (r0
, 128);
6879 mark_reg_pointer (r1
, 128);
6880 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6881 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6885 static enum machine_mode
6886 spu_libgcc_cmp_return_mode (void)
6889 /* For SPU word mode is TI mode so it is better to use SImode
6890 for compare returns. */
6894 static enum machine_mode
6895 spu_libgcc_shift_count_mode (void)
6897 /* For SPU word mode is TI mode so it is better to use SImode
6898 for shift counts. */
6902 /* Implement targetm.section_type_flags. */
6904 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6906 /* .toe needs to have type @nobits. */
6907 if (strcmp (name
, ".toe") == 0)
6909 /* Don't load _ea into the current address space. */
6910 if (strcmp (name
, "._ea") == 0)
6911 return SECTION_WRITE
| SECTION_DEBUG
;
6912 return default_section_type_flags (decl
, name
, reloc
);
6915 /* Implement targetm.select_section. */
6917 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
6919 /* Variables and constants defined in the __ea address space
6920 go into a special section named "._ea". */
6921 if (TREE_TYPE (decl
) != error_mark_node
6922 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
6924 /* We might get called with string constants, but get_named_section
6925 doesn't like them as they are not DECLs. Also, we need to set
6926 flags in that case. */
6928 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
6930 return get_named_section (decl
, "._ea", reloc
);
6933 return default_elf_select_section (decl
, reloc
, align
);
6936 /* Implement targetm.unique_section. */
6938 spu_unique_section (tree decl
, int reloc
)
6940 /* We don't support unique section names in the __ea address
6942 if (TREE_TYPE (decl
) != error_mark_node
6943 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
6946 default_unique_section (decl
, reloc
);
6949 /* Generate a constant or register which contains 2^SCALE. We assume
6950 the result is valid for MODE. Currently, MODE must be V4SFmode and
6951 SCALE must be SImode. */
6953 spu_gen_exp2 (enum machine_mode mode
, rtx scale
)
6955 gcc_assert (mode
== V4SFmode
);
6956 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6957 if (GET_CODE (scale
) != CONST_INT
)
6959 /* unsigned int exp = (127 + scale) << 23;
6960 __vector float m = (__vector float) spu_splats (exp); */
6961 rtx reg
= force_reg (SImode
, scale
);
6962 rtx exp
= gen_reg_rtx (SImode
);
6963 rtx mul
= gen_reg_rtx (mode
);
6964 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6965 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6966 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6971 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6972 unsigned char arr
[16];
6973 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6974 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6975 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6976 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6977 return array_to_constant (mode
, arr
);
6981 /* After reload, just change the convert into a move instruction
6982 or a dead instruction. */
6984 spu_split_convert (rtx ops
[])
6986 if (REGNO (ops
[0]) == REGNO (ops
[1]))
6987 emit_note (NOTE_INSN_DELETED
);
6990 /* Use TImode always as this might help hard reg copyprop. */
6991 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
6992 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
6993 emit_insn (gen_move_insn (op0
, op1
));
6998 spu_function_profiler (FILE * file
, int labelno ATTRIBUTE_UNUSED
)
7000 fprintf (file
, "# profile\n");
7001 fprintf (file
, "brsl $75, _mcount\n");
7004 /* Implement targetm.ref_may_alias_errno. */
7006 spu_ref_may_alias_errno (ao_ref
*ref
)
7008 tree base
= ao_ref_base (ref
);
7010 /* With SPU newlib, errno is defined as something like
7012 The default implementation of this target macro does not
7013 recognize such expressions, so special-code for it here. */
7015 if (TREE_CODE (base
) == VAR_DECL
7016 && !TREE_STATIC (base
)
7017 && DECL_EXTERNAL (base
)
7018 && TREE_CODE (TREE_TYPE (base
)) == RECORD_TYPE
7019 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base
)),
7020 "_impure_data") == 0
7021 /* _errno is the first member of _impure_data. */
7022 && ref
->offset
== 0)
7025 return default_ref_may_alias_errno (ref
);
7028 /* Output thunk to FILE that implements a C++ virtual function call (with
7029 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7030 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7031 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7032 relative to the resulting this pointer. */
7035 spu_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
7036 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
7041 /* Make sure unwind info is emitted for the thunk if needed. */
7042 final_start_function (emit_barrier (), file
, 1);
7044 /* Operand 0 is the target function. */
7045 op
[0] = XEXP (DECL_RTL (function
), 0);
7047 /* Operand 1 is the 'this' pointer. */
7048 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
7049 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
+ 1);
7051 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
);
7053 /* Operands 2/3 are the low/high halfwords of delta. */
7054 op
[2] = GEN_INT (trunc_int_for_mode (delta
, HImode
));
7055 op
[3] = GEN_INT (trunc_int_for_mode (delta
>> 16, HImode
));
7057 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7058 op
[4] = GEN_INT (trunc_int_for_mode (vcall_offset
, HImode
));
7059 op
[5] = GEN_INT (trunc_int_for_mode (vcall_offset
>> 16, HImode
));
7061 /* Operands 6/7 are temporary registers. */
7062 op
[6] = gen_rtx_REG (Pmode
, 79);
7063 op
[7] = gen_rtx_REG (Pmode
, 78);
7065 /* Add DELTA to this pointer. */
7068 if (delta
>= -0x200 && delta
< 0x200)
7069 output_asm_insn ("ai\t%1,%1,%2", op
);
7070 else if (delta
>= -0x8000 && delta
< 0x8000)
7072 output_asm_insn ("il\t%6,%2", op
);
7073 output_asm_insn ("a\t%1,%1,%6", op
);
7077 output_asm_insn ("ilhu\t%6,%3", op
);
7078 output_asm_insn ("iohl\t%6,%2", op
);
7079 output_asm_insn ("a\t%1,%1,%6", op
);
7083 /* Perform vcall adjustment. */
7086 output_asm_insn ("lqd\t%7,0(%1)", op
);
7087 output_asm_insn ("rotqby\t%7,%7,%1", op
);
7089 if (vcall_offset
>= -0x200 && vcall_offset
< 0x200)
7090 output_asm_insn ("ai\t%7,%7,%4", op
);
7091 else if (vcall_offset
>= -0x8000 && vcall_offset
< 0x8000)
7093 output_asm_insn ("il\t%6,%4", op
);
7094 output_asm_insn ("a\t%7,%7,%6", op
);
7098 output_asm_insn ("ilhu\t%6,%5", op
);
7099 output_asm_insn ("iohl\t%6,%4", op
);
7100 output_asm_insn ("a\t%7,%7,%6", op
);
7103 output_asm_insn ("lqd\t%6,0(%7)", op
);
7104 output_asm_insn ("rotqby\t%6,%6,%7", op
);
7105 output_asm_insn ("a\t%1,%1,%6", op
);
7108 /* Jump to target. */
7109 output_asm_insn ("br\t%0", op
);
7111 final_end_function ();
7114 /* Canonicalize a comparison from one we don't have to one we do have. */
7116 spu_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
7117 bool op0_preserve_value
)
7119 if (!op0_preserve_value
7120 && (*code
== LE
|| *code
== LT
|| *code
== LEU
|| *code
== LTU
))
7125 *code
= (int)swap_condition ((enum rtx_code
)*code
);
7129 /* Table of machine attributes. */
7130 static const struct attribute_spec spu_attribute_table
[] =
7132 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7133 affects_type_identity } */
7134 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
,
7136 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
,
7138 { NULL
, 0, 0, false, false, false, NULL
, false }
7141 /* TARGET overrides. */
7143 #undef TARGET_ADDR_SPACE_POINTER_MODE
7144 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7146 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7147 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7149 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7150 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7151 spu_addr_space_legitimate_address_p
7153 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7154 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7156 #undef TARGET_ADDR_SPACE_SUBSET_P
7157 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7159 #undef TARGET_ADDR_SPACE_CONVERT
7160 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7162 #undef TARGET_INIT_BUILTINS
7163 #define TARGET_INIT_BUILTINS spu_init_builtins
7164 #undef TARGET_BUILTIN_DECL
7165 #define TARGET_BUILTIN_DECL spu_builtin_decl
7167 #undef TARGET_EXPAND_BUILTIN
7168 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7170 #undef TARGET_UNWIND_WORD_MODE
7171 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7173 #undef TARGET_LEGITIMIZE_ADDRESS
7174 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7176 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7177 and .quad for the debugger. When it is known that the assembler is fixed,
7178 these can be removed. */
7179 #undef TARGET_ASM_UNALIGNED_SI_OP
7180 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7182 #undef TARGET_ASM_ALIGNED_DI_OP
7183 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7185 /* The .8byte directive doesn't seem to work well for a 32 bit
7187 #undef TARGET_ASM_UNALIGNED_DI_OP
7188 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7190 #undef TARGET_RTX_COSTS
7191 #define TARGET_RTX_COSTS spu_rtx_costs
7193 #undef TARGET_ADDRESS_COST
7194 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7196 #undef TARGET_SCHED_ISSUE_RATE
7197 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7199 #undef TARGET_SCHED_INIT_GLOBAL
7200 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7202 #undef TARGET_SCHED_INIT
7203 #define TARGET_SCHED_INIT spu_sched_init
7205 #undef TARGET_SCHED_VARIABLE_ISSUE
7206 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7208 #undef TARGET_SCHED_REORDER
7209 #define TARGET_SCHED_REORDER spu_sched_reorder
7211 #undef TARGET_SCHED_REORDER2
7212 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7214 #undef TARGET_SCHED_ADJUST_COST
7215 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7217 #undef TARGET_ATTRIBUTE_TABLE
7218 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7220 #undef TARGET_ASM_INTEGER
7221 #define TARGET_ASM_INTEGER spu_assemble_integer
7223 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7224 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7226 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7227 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7229 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7230 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7232 #undef TARGET_ASM_GLOBALIZE_LABEL
7233 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7235 #undef TARGET_PASS_BY_REFERENCE
7236 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7238 #undef TARGET_FUNCTION_ARG
7239 #define TARGET_FUNCTION_ARG spu_function_arg
7241 #undef TARGET_FUNCTION_ARG_ADVANCE
7242 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7244 #undef TARGET_MUST_PASS_IN_STACK
7245 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7247 #undef TARGET_BUILD_BUILTIN_VA_LIST
7248 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7250 #undef TARGET_EXPAND_BUILTIN_VA_START
7251 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7253 #undef TARGET_SETUP_INCOMING_VARARGS
7254 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7256 #undef TARGET_MACHINE_DEPENDENT_REORG
7257 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7259 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7260 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7262 #undef TARGET_INIT_LIBFUNCS
7263 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7265 #undef TARGET_RETURN_IN_MEMORY
7266 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7268 #undef TARGET_ENCODE_SECTION_INFO
7269 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7271 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7272 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7274 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7275 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7277 #undef TARGET_VECTORIZE_INIT_COST
7278 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7280 #undef TARGET_VECTORIZE_ADD_STMT_COST
7281 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7283 #undef TARGET_VECTORIZE_FINISH_COST
7284 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7286 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7287 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7289 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7290 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7292 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7293 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7295 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7296 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7298 #undef TARGET_SCHED_SMS_RES_MII
7299 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7301 #undef TARGET_SECTION_TYPE_FLAGS
7302 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7304 #undef TARGET_ASM_SELECT_SECTION
7305 #define TARGET_ASM_SELECT_SECTION spu_select_section
7307 #undef TARGET_ASM_UNIQUE_SECTION
7308 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7310 #undef TARGET_LEGITIMATE_ADDRESS_P
7311 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7313 #undef TARGET_LEGITIMATE_CONSTANT_P
7314 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7316 #undef TARGET_TRAMPOLINE_INIT
7317 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7319 #undef TARGET_WARN_FUNC_RETURN
7320 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7322 #undef TARGET_OPTION_OVERRIDE
7323 #define TARGET_OPTION_OVERRIDE spu_option_override
7325 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7326 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7328 #undef TARGET_REF_MAY_ALIAS_ERRNO
7329 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7331 #undef TARGET_ASM_OUTPUT_MI_THUNK
7332 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7333 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7334 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7336 /* Variable tracking should be run after all optimizations which
7337 change order of insns. It also needs a valid CFG. */
7338 #undef TARGET_DELAY_VARTRACK
7339 #define TARGET_DELAY_VARTRACK true
7341 #undef TARGET_CANONICALIZE_COMPARISON
7342 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7344 #undef TARGET_CAN_USE_DOLOOP_P
7345 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7347 struct gcc_target targetm
= TARGET_INITIALIZER
;