1 /* Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
33 #include "double-int.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "stor-layout.h"
46 #include "insn-codes.h"
53 #include "dominance.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "diagnostic-core.h"
65 #include "target-def.h"
66 #include "langhooks.h"
68 #include "sched-int.h"
70 #include "hash-table.h"
71 #include "tree-ssa-alias.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
75 #include "gimple-expr.h"
79 #include "tm-constrs.h"
89 /* Builtin types, data and prototypes. */
91 enum spu_builtin_type_index
93 SPU_BTI_END_OF_PARAMS
,
95 /* We create new type nodes for these. */
107 /* A 16-byte type. (Implemented with V16QI_type_node) */
110 /* These all correspond to intSI_type_node */
124 /* These correspond to the standard types */
144 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
145 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
146 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
147 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
148 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
149 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
150 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
151 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
152 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
153 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
155 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
157 struct spu_builtin_range
162 static struct spu_builtin_range spu_builtin_range
[] = {
163 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
164 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
165 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
166 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
167 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
168 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
169 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
170 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
171 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
172 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
173 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
174 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
178 /* Target specific attribute specifications. */
179 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
181 /* Prototypes and external defs. */
182 static int get_pipe (rtx_insn
*insn
);
183 static int spu_naked_function_p (tree func
);
184 static int mem_is_padded_component_ref (rtx x
);
185 static void fix_range (const char *);
186 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
188 /* Which instruction set architecture to use. */
190 /* Which cpu are we tuning for. */
193 /* The hardware requires 8 insns between a hint and the branch it
194 effects. This variable describes how many rtl instructions the
195 compiler needs to see before inserting a hint, and then the compiler
196 will insert enough nops to make it at least 8 insns. The default is
197 for the compiler to allow up to 2 nops be emitted. The nops are
198 inserted in pairs, so we round down. */
199 int spu_hint_dist
= (8*4) - (2*4);
214 IC_POOL
, /* constant pool */
215 IC_IL1
, /* one il* instruction */
216 IC_IL2
, /* both ilhu and iohl instructions */
217 IC_IL1s
, /* one il* instruction */
218 IC_IL2s
, /* both ilhu and iohl instructions */
219 IC_FSMBI
, /* the fsmbi instruction */
220 IC_CPAT
, /* one of the c*d instructions */
221 IC_FSMBI2
/* fsmbi plus 1 other instruction */
224 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
225 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
226 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
227 static enum immediate_class
classify_immediate (rtx op
,
230 /* Pointer mode for __ea references. */
231 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
234 /* Define the structure for the machine field in struct function. */
235 struct GTY(()) machine_function
237 /* Register to use for PIC accesses. */
241 /* How to allocate a 'struct machine_function'. */
242 static struct machine_function
*
243 spu_init_machine_status (void)
245 return ggc_cleared_alloc
<machine_function
> ();
248 /* Implement TARGET_OPTION_OVERRIDE. */
250 spu_option_override (void)
252 /* Set up function hooks. */
253 init_machine_status
= spu_init_machine_status
;
255 /* Small loops will be unpeeled at -O3. For SPU it is more important
256 to keep code small by default. */
257 if (!flag_unroll_loops
&& !flag_peel_loops
)
258 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 4,
259 global_options
.x_param_values
,
260 global_options_set
.x_param_values
);
262 flag_omit_frame_pointer
= 1;
264 /* Functions must be 8 byte aligned so we correctly handle dual issue */
265 if (align_functions
< 8)
268 spu_hint_dist
= 8*4 - spu_max_nops
*4;
269 if (spu_hint_dist
< 0)
272 if (spu_fixed_range_string
)
273 fix_range (spu_fixed_range_string
);
275 /* Determine processor architectural level. */
278 if (strcmp (&spu_arch_string
[0], "cell") == 0)
279 spu_arch
= PROCESSOR_CELL
;
280 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
281 spu_arch
= PROCESSOR_CELLEDP
;
283 error ("bad value (%s) for -march= switch", spu_arch_string
);
286 /* Determine processor to tune for. */
289 if (strcmp (&spu_tune_string
[0], "cell") == 0)
290 spu_tune
= PROCESSOR_CELL
;
291 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
292 spu_tune
= PROCESSOR_CELLEDP
;
294 error ("bad value (%s) for -mtune= switch", spu_tune_string
);
297 /* Change defaults according to the processor architecture. */
298 if (spu_arch
== PROCESSOR_CELLEDP
)
300 /* If no command line option has been otherwise specified, change
301 the default to -mno-safe-hints on celledp -- only the original
302 Cell/B.E. processors require this workaround. */
303 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
304 target_flags
&= ~MASK_SAFE_HINTS
;
307 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
310 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
311 struct attribute_spec.handler. */
313 /* True if MODE is valid for the target. By "valid", we mean able to
314 be manipulated in non-trivial ways. In particular, this means all
315 the arithmetic is supported. */
317 spu_scalar_mode_supported_p (machine_mode mode
)
335 /* Similarly for vector modes. "Supported" here is less strict. At
336 least some operations are supported; need to check optabs or builtins
337 for further details. */
339 spu_vector_mode_supported_p (machine_mode mode
)
356 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
357 least significant bytes of the outer mode. This function returns
358 TRUE for the SUBREG's where this is correct. */
360 valid_subreg (rtx op
)
362 machine_mode om
= GET_MODE (op
);
363 machine_mode im
= GET_MODE (SUBREG_REG (op
));
364 return om
!= VOIDmode
&& im
!= VOIDmode
365 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
366 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
367 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
370 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
371 and adjust the start offset. */
373 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
377 /* Strip any paradoxical SUBREG. */
378 if (GET_CODE (op
) == SUBREG
379 && (GET_MODE_BITSIZE (GET_MODE (op
))
380 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
384 GET_MODE_BITSIZE (GET_MODE (op
)) -
385 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
386 op
= SUBREG_REG (op
);
388 /* If it is smaller than SI, assure a SUBREG */
389 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
393 *start
+= 32 - op_size
;
396 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
397 mode
= mode_for_size (op_size
, MODE_INT
, 0);
398 if (mode
!= GET_MODE (op
))
399 op
= gen_rtx_SUBREG (mode
, op
, 0);
404 spu_expand_extv (rtx ops
[], int unsignedp
)
406 rtx dst
= ops
[0], src
= ops
[1];
407 HOST_WIDE_INT width
= INTVAL (ops
[2]);
408 HOST_WIDE_INT start
= INTVAL (ops
[3]);
409 HOST_WIDE_INT align_mask
;
410 rtx s0
, s1
, mask
, r0
;
412 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
416 /* First, determine if we need 1 TImode load or 2. We need only 1
417 if the bits being extracted do not cross the alignment boundary
418 as determined by the MEM and its address. */
420 align_mask
= -MEM_ALIGN (src
);
421 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
423 /* Alignment is sufficient for 1 load. */
424 s0
= gen_reg_rtx (TImode
);
425 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
428 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
433 s0
= gen_reg_rtx (TImode
);
434 s1
= gen_reg_rtx (TImode
);
435 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
438 gcc_assert (start
+ width
<= 128);
441 rtx r1
= gen_reg_rtx (SImode
);
442 mask
= gen_reg_rtx (TImode
);
443 emit_move_insn (mask
, GEN_INT (-1));
444 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
445 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
446 if (GET_CODE (r0
) == CONST_INT
)
447 r1
= GEN_INT (INTVAL (r0
) & 15);
449 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
450 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
451 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
456 else if (GET_CODE (src
) == SUBREG
)
458 rtx r
= SUBREG_REG (src
);
459 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
460 s0
= gen_reg_rtx (TImode
);
461 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
462 emit_insn (gen_rtx_SET (VOIDmode
, s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
464 emit_move_insn (s0
, src
);
468 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
469 s0
= gen_reg_rtx (TImode
);
470 emit_move_insn (s0
, src
);
473 /* Now s0 is TImode and contains the bits to extract at start. */
476 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
479 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, 128 - width
, s0
, unsignedp
);
481 emit_move_insn (dst
, s0
);
485 spu_expand_insv (rtx ops
[])
487 HOST_WIDE_INT width
= INTVAL (ops
[1]);
488 HOST_WIDE_INT start
= INTVAL (ops
[2]);
489 HOST_WIDE_INT maskbits
;
490 machine_mode dst_mode
;
491 rtx dst
= ops
[0], src
= ops
[3];
498 if (GET_CODE (ops
[0]) == MEM
)
499 dst
= gen_reg_rtx (TImode
);
501 dst
= adjust_operand (dst
, &start
);
502 dst_mode
= GET_MODE (dst
);
503 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
505 if (CONSTANT_P (src
))
508 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
509 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
511 src
= adjust_operand (src
, 0);
513 mask
= gen_reg_rtx (dst_mode
);
514 shift_reg
= gen_reg_rtx (dst_mode
);
515 shift
= dst_size
- start
- width
;
517 /* It's not safe to use subreg here because the compiler assumes
518 that the SUBREG_REG is right justified in the SUBREG. */
519 convert_move (shift_reg
, src
, 1);
526 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
529 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
532 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
544 maskbits
= (-1ll << (32 - width
- start
));
546 maskbits
+= (1ll << (32 - start
));
547 emit_move_insn (mask
, GEN_INT (maskbits
));
550 maskbits
= (-1ll << (64 - width
- start
));
552 maskbits
+= (1ll << (64 - start
));
553 emit_move_insn (mask
, GEN_INT (maskbits
));
557 unsigned char arr
[16];
559 memset (arr
, 0, sizeof (arr
));
560 arr
[i
] = 0xff >> (start
& 7);
561 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
563 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
564 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
570 if (GET_CODE (ops
[0]) == MEM
)
572 rtx low
= gen_reg_rtx (SImode
);
573 rtx rotl
= gen_reg_rtx (SImode
);
574 rtx mask0
= gen_reg_rtx (TImode
);
580 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
581 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
582 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
583 emit_insn (gen_negsi2 (rotl
, low
));
584 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
585 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
586 mem
= change_address (ops
[0], TImode
, addr0
);
587 set_mem_alias_set (mem
, 0);
588 emit_move_insn (dst
, mem
);
589 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
590 if (start
+ width
> MEM_ALIGN (ops
[0]))
592 rtx shl
= gen_reg_rtx (SImode
);
593 rtx mask1
= gen_reg_rtx (TImode
);
594 rtx dst1
= gen_reg_rtx (TImode
);
596 addr1
= plus_constant (Pmode
, addr
, 16);
597 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
598 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
599 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
600 mem1
= change_address (ops
[0], TImode
, addr1
);
601 set_mem_alias_set (mem1
, 0);
602 emit_move_insn (dst1
, mem1
);
603 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
604 emit_move_insn (mem1
, dst1
);
606 emit_move_insn (mem
, dst
);
609 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
614 spu_expand_block_move (rtx ops
[])
616 HOST_WIDE_INT bytes
, align
, offset
;
617 rtx src
, dst
, sreg
, dreg
, target
;
619 if (GET_CODE (ops
[2]) != CONST_INT
620 || GET_CODE (ops
[3]) != CONST_INT
621 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
624 bytes
= INTVAL (ops
[2]);
625 align
= INTVAL (ops
[3]);
635 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
637 dst
= adjust_address (ops
[0], V16QImode
, offset
);
638 src
= adjust_address (ops
[1], V16QImode
, offset
);
639 emit_move_insn (dst
, src
);
644 unsigned char arr
[16] = { 0 };
645 for (i
= 0; i
< bytes
- offset
; i
++)
647 dst
= adjust_address (ops
[0], V16QImode
, offset
);
648 src
= adjust_address (ops
[1], V16QImode
, offset
);
649 mask
= gen_reg_rtx (V16QImode
);
650 sreg
= gen_reg_rtx (V16QImode
);
651 dreg
= gen_reg_rtx (V16QImode
);
652 target
= gen_reg_rtx (V16QImode
);
653 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
654 emit_move_insn (dreg
, dst
);
655 emit_move_insn (sreg
, src
);
656 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
657 emit_move_insn (dst
, target
);
665 { SPU_EQ
, SPU_GT
, SPU_GTU
};
667 int spu_comp_icode
[12][3] = {
668 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
669 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
670 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
671 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
672 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
673 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
674 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
675 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
676 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
677 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
678 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
679 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
682 /* Generate a compare for CODE. Return a brand-new rtx that represents
683 the result of the compare. GCC can figure this out too if we don't
684 provide all variations of compares, but GCC always wants to use
685 WORD_MODE, we can generate better code in most cases if we do it
688 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
690 int reverse_compare
= 0;
691 int reverse_test
= 0;
692 rtx compare_result
, eq_result
;
693 rtx comp_rtx
, eq_rtx
;
694 machine_mode comp_mode
;
695 machine_mode op_mode
;
696 enum spu_comp_code scode
, eq_code
;
697 enum insn_code ior_code
;
698 enum rtx_code code
= GET_CODE (cmp
);
699 rtx op0
= XEXP (cmp
, 0);
700 rtx op1
= XEXP (cmp
, 1);
704 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
705 and so on, to keep the constant in operand 1. */
706 if (GET_CODE (op1
) == CONST_INT
)
708 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
709 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
733 /* However, if we generate an integer result, performing a reverse test
734 would require an extra negation, so avoid that where possible. */
735 if (GET_CODE (op1
) == CONST_INT
&& is_set
== 1)
737 HOST_WIDE_INT val
= INTVAL (op1
) + 1;
738 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
755 op_mode
= GET_MODE (op0
);
761 if (HONOR_NANS (op_mode
))
776 if (HONOR_NANS (op_mode
))
868 comp_mode
= V4SImode
;
872 comp_mode
= V2DImode
;
879 if (GET_MODE (op1
) == DFmode
880 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
883 if (is_set
== 0 && op1
== const0_rtx
884 && (GET_MODE (op0
) == SImode
885 || GET_MODE (op0
) == HImode
886 || GET_MODE (op0
) == QImode
) && scode
== SPU_EQ
)
888 /* Don't need to set a register with the result when we are
889 comparing against zero and branching. */
890 reverse_test
= !reverse_test
;
891 compare_result
= op0
;
895 compare_result
= gen_reg_rtx (comp_mode
);
904 if (spu_comp_icode
[index
][scode
] == 0)
907 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
909 op0
= force_reg (op_mode
, op0
);
910 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
912 op1
= force_reg (op_mode
, op1
);
913 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
917 emit_insn (comp_rtx
);
921 eq_result
= gen_reg_rtx (comp_mode
);
922 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
927 ior_code
= optab_handler (ior_optab
, comp_mode
);
928 gcc_assert (ior_code
!= CODE_FOR_nothing
);
929 emit_insn (GEN_FCN (ior_code
)
930 (compare_result
, compare_result
, eq_result
));
939 /* We don't have branch on QI compare insns, so we convert the
940 QI compare result to a HI result. */
941 if (comp_mode
== QImode
)
943 rtx old_res
= compare_result
;
944 compare_result
= gen_reg_rtx (HImode
);
946 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
950 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
952 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
954 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
955 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
956 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
959 else if (is_set
== 2)
961 rtx target
= operands
[0];
962 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
963 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
964 machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
966 rtx op_t
= operands
[2];
967 rtx op_f
= operands
[3];
969 /* The result of the comparison can be SI, HI or QI mode. Create a
970 mask based on that result. */
971 if (target_size
> compare_size
)
973 select_mask
= gen_reg_rtx (mode
);
974 emit_insn (gen_extend_compare (select_mask
, compare_result
));
976 else if (target_size
< compare_size
)
978 gen_rtx_SUBREG (mode
, compare_result
,
979 (compare_size
- target_size
) / BITS_PER_UNIT
);
980 else if (comp_mode
!= mode
)
981 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
983 select_mask
= compare_result
;
985 if (GET_MODE (target
) != GET_MODE (op_t
)
986 || GET_MODE (target
) != GET_MODE (op_f
))
990 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
992 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
996 rtx target
= operands
[0];
998 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
999 gen_rtx_NOT (comp_mode
, compare_result
)));
1000 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1001 emit_insn (gen_extendhisi2 (target
, compare_result
));
1002 else if (GET_MODE (target
) == SImode
1003 && GET_MODE (compare_result
) == QImode
)
1004 emit_insn (gen_extend_compare (target
, compare_result
));
1006 emit_move_insn (target
, compare_result
);
1011 const_double_to_hwint (rtx x
)
1015 if (GET_MODE (x
) == SFmode
)
1017 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1018 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1020 else if (GET_MODE (x
) == DFmode
)
1023 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1024 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1026 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1034 hwint_to_const_double (machine_mode mode
, HOST_WIDE_INT v
)
1038 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1041 tv
[0] = (v
<< 32) >> 32;
1042 else if (mode
== DFmode
)
1044 tv
[1] = (v
<< 32) >> 32;
1047 real_from_target (&rv
, tv
, mode
);
1048 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1052 print_operand_address (FILE * file
, register rtx addr
)
1057 if (GET_CODE (addr
) == AND
1058 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1059 && INTVAL (XEXP (addr
, 1)) == -16)
1060 addr
= XEXP (addr
, 0);
1062 switch (GET_CODE (addr
))
1065 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1069 reg
= XEXP (addr
, 0);
1070 offset
= XEXP (addr
, 1);
1071 if (GET_CODE (offset
) == REG
)
1073 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1074 reg_names
[REGNO (offset
)]);
1076 else if (GET_CODE (offset
) == CONST_INT
)
1078 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1079 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1089 output_addr_const (file
, addr
);
1099 print_operand (FILE * file
, rtx x
, int code
)
1101 machine_mode mode
= GET_MODE (x
);
1103 unsigned char arr
[16];
1104 int xcode
= GET_CODE (x
);
1106 if (GET_MODE (x
) == VOIDmode
)
1109 case 'L': /* 128 bits, signed */
1110 case 'm': /* 128 bits, signed */
1111 case 'T': /* 128 bits, signed */
1112 case 't': /* 128 bits, signed */
1115 case 'K': /* 64 bits, signed */
1116 case 'k': /* 64 bits, signed */
1117 case 'D': /* 64 bits, signed */
1118 case 'd': /* 64 bits, signed */
1121 case 'J': /* 32 bits, signed */
1122 case 'j': /* 32 bits, signed */
1123 case 's': /* 32 bits, signed */
1124 case 'S': /* 32 bits, signed */
1131 case 'j': /* 32 bits, signed */
1132 case 'k': /* 64 bits, signed */
1133 case 'm': /* 128 bits, signed */
1134 if (xcode
== CONST_INT
1135 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1137 gcc_assert (logical_immediate_p (x
, mode
));
1138 constant_to_array (mode
, x
, arr
);
1139 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1140 val
= trunc_int_for_mode (val
, SImode
);
1141 switch (which_logical_immediate (val
))
1146 fprintf (file
, "h");
1149 fprintf (file
, "b");
1159 case 'J': /* 32 bits, signed */
1160 case 'K': /* 64 bits, signed */
1161 case 'L': /* 128 bits, signed */
1162 if (xcode
== CONST_INT
1163 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1165 gcc_assert (logical_immediate_p (x
, mode
)
1166 || iohl_immediate_p (x
, mode
));
1167 constant_to_array (mode
, x
, arr
);
1168 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1169 val
= trunc_int_for_mode (val
, SImode
);
1170 switch (which_logical_immediate (val
))
1176 val
= trunc_int_for_mode (val
, HImode
);
1179 val
= trunc_int_for_mode (val
, QImode
);
1184 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1190 case 't': /* 128 bits, signed */
1191 case 'd': /* 64 bits, signed */
1192 case 's': /* 32 bits, signed */
1195 enum immediate_class c
= classify_immediate (x
, mode
);
1199 constant_to_array (mode
, x
, arr
);
1200 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1201 val
= trunc_int_for_mode (val
, SImode
);
1202 switch (which_immediate_load (val
))
1207 fprintf (file
, "a");
1210 fprintf (file
, "h");
1213 fprintf (file
, "hu");
1220 constant_to_array (mode
, x
, arr
);
1221 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1223 fprintf (file
, "b");
1225 fprintf (file
, "h");
1227 fprintf (file
, "w");
1229 fprintf (file
, "d");
1232 if (xcode
== CONST_VECTOR
)
1234 x
= CONST_VECTOR_ELT (x
, 0);
1235 xcode
= GET_CODE (x
);
1237 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1238 fprintf (file
, "a");
1239 else if (xcode
== HIGH
)
1240 fprintf (file
, "hu");
1254 case 'T': /* 128 bits, signed */
1255 case 'D': /* 64 bits, signed */
1256 case 'S': /* 32 bits, signed */
1259 enum immediate_class c
= classify_immediate (x
, mode
);
1263 constant_to_array (mode
, x
, arr
);
1264 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1265 val
= trunc_int_for_mode (val
, SImode
);
1266 switch (which_immediate_load (val
))
1273 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1278 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1281 constant_to_array (mode
, x
, arr
);
1283 for (i
= 0; i
< 16; i
++)
1288 print_operand (file
, GEN_INT (val
), 0);
1291 constant_to_array (mode
, x
, arr
);
1292 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1293 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1298 if (GET_CODE (x
) == CONST_VECTOR
)
1299 x
= CONST_VECTOR_ELT (x
, 0);
1300 output_addr_const (file
, x
);
1302 fprintf (file
, "@h");
1316 if (xcode
== CONST_INT
)
1318 /* Only 4 least significant bits are relevant for generate
1319 control word instructions. */
1320 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1325 case 'M': /* print code for c*d */
1326 if (GET_CODE (x
) == CONST_INT
)
1330 fprintf (file
, "b");
1333 fprintf (file
, "h");
1336 fprintf (file
, "w");
1339 fprintf (file
, "d");
1348 case 'N': /* Negate the operand */
1349 if (xcode
== CONST_INT
)
1350 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1351 else if (xcode
== CONST_VECTOR
)
1352 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1353 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1356 case 'I': /* enable/disable interrupts */
1357 if (xcode
== CONST_INT
)
1358 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1361 case 'b': /* branch modifiers */
1363 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1364 else if (COMPARISON_P (x
))
1365 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1368 case 'i': /* indirect call */
1371 if (GET_CODE (XEXP (x
, 0)) == REG
)
1372 /* Used in indirect function calls. */
1373 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1375 output_address (XEXP (x
, 0));
1379 case 'p': /* load/store */
1383 xcode
= GET_CODE (x
);
1388 xcode
= GET_CODE (x
);
1391 fprintf (file
, "d");
1392 else if (xcode
== CONST_INT
)
1393 fprintf (file
, "a");
1394 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1395 fprintf (file
, "r");
1396 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1398 if (GET_CODE (XEXP (x
, 1)) == REG
)
1399 fprintf (file
, "x");
1401 fprintf (file
, "d");
1406 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1408 output_addr_const (file
, GEN_INT (val
));
1412 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1414 output_addr_const (file
, GEN_INT (val
));
1418 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1420 output_addr_const (file
, GEN_INT (val
));
1424 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1425 val
= (val
>> 3) & 0x1f;
1426 output_addr_const (file
, GEN_INT (val
));
1430 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1433 output_addr_const (file
, GEN_INT (val
));
1437 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1440 output_addr_const (file
, GEN_INT (val
));
1444 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1447 output_addr_const (file
, GEN_INT (val
));
1451 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1452 val
= -(val
& -8ll);
1453 val
= (val
>> 3) & 0x1f;
1454 output_addr_const (file
, GEN_INT (val
));
1459 constant_to_array (mode
, x
, arr
);
1460 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1461 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1466 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1467 else if (xcode
== MEM
)
1468 output_address (XEXP (x
, 0));
1469 else if (xcode
== CONST_VECTOR
)
1470 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1472 output_addr_const (file
, x
);
1479 output_operand_lossage ("invalid %%xn code");
1484 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1485 caller saved register. For leaf functions it is more efficient to
1486 use a volatile register because we won't need to save and restore the
1487 pic register. This routine is only valid after register allocation
1488 is completed, so we can pick an unused register. */
1492 if (!reload_completed
&& !reload_in_progress
)
1495 /* If we've already made the decision, we need to keep with it. Once we've
1496 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1497 return true since the register is now live; this should not cause us to
1498 "switch back" to using pic_offset_table_rtx. */
1499 if (!cfun
->machine
->pic_reg
)
1501 if (crtl
->is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1502 cfun
->machine
->pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1504 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1507 return cfun
->machine
->pic_reg
;
1510 /* Split constant addresses to handle cases that are too large.
1511 Add in the pic register when in PIC mode.
1512 Split immediates that require more than 1 instruction. */
1514 spu_split_immediate (rtx
* ops
)
1516 machine_mode mode
= GET_MODE (ops
[0]);
1517 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1523 unsigned char arrhi
[16];
1524 unsigned char arrlo
[16];
1525 rtx to
, temp
, hi
, lo
;
1527 machine_mode imode
= mode
;
1528 /* We need to do reals as ints because the constant used in the
1529 IOR might not be a legitimate real constant. */
1530 imode
= int_mode_for_mode (mode
);
1531 constant_to_array (mode
, ops
[1], arrhi
);
1533 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1536 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1537 for (i
= 0; i
< 16; i
+= 4)
1539 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1540 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1541 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1542 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1544 hi
= array_to_constant (imode
, arrhi
);
1545 lo
= array_to_constant (imode
, arrlo
);
1546 emit_move_insn (temp
, hi
);
1547 emit_insn (gen_rtx_SET
1548 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1553 unsigned char arr_fsmbi
[16];
1554 unsigned char arr_andbi
[16];
1555 rtx to
, reg_fsmbi
, reg_and
;
1557 machine_mode imode
= mode
;
1558 /* We need to do reals as ints because the constant used in the
1559 * AND might not be a legitimate real constant. */
1560 imode
= int_mode_for_mode (mode
);
1561 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1563 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1566 for (i
= 0; i
< 16; i
++)
1567 if (arr_fsmbi
[i
] != 0)
1569 arr_andbi
[0] = arr_fsmbi
[i
];
1570 arr_fsmbi
[i
] = 0xff;
1572 for (i
= 1; i
< 16; i
++)
1573 arr_andbi
[i
] = arr_andbi
[0];
1574 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1575 reg_and
= array_to_constant (imode
, arr_andbi
);
1576 emit_move_insn (to
, reg_fsmbi
);
1577 emit_insn (gen_rtx_SET
1578 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1582 if (reload_in_progress
|| reload_completed
)
1584 rtx mem
= force_const_mem (mode
, ops
[1]);
1585 if (TARGET_LARGE_MEM
)
1587 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1588 emit_move_insn (addr
, XEXP (mem
, 0));
1589 mem
= replace_equiv_address (mem
, addr
);
1591 emit_move_insn (ops
[0], mem
);
1597 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1601 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1602 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1605 emit_insn (gen_pic (ops
[0], ops
[1]));
1608 rtx pic_reg
= get_pic_reg ();
1609 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1611 return flag_pic
|| c
== IC_IL2s
;
1622 /* SAVING is TRUE when we are generating the actual load and store
1623 instructions for REGNO. When determining the size of the stack
1624 needed for saving register we must allocate enough space for the
1625 worst case, because we don't always have the information early enough
1626 to not allocate it. But we can at least eliminate the actual loads
1627 and stores during the prologue/epilogue. */
1629 need_to_save_reg (int regno
, int saving
)
1631 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1634 && regno
== PIC_OFFSET_TABLE_REGNUM
1635 && (!saving
|| cfun
->machine
->pic_reg
== pic_offset_table_rtx
))
1640 /* This function is only correct starting with local register
1643 spu_saved_regs_size (void)
1645 int reg_save_size
= 0;
1648 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1649 if (need_to_save_reg (regno
, 0))
1650 reg_save_size
+= 0x10;
1651 return reg_save_size
;
1655 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1657 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1659 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1660 return emit_insn (gen_movv4si (mem
, reg
));
1664 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1666 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1668 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1669 return emit_insn (gen_movv4si (reg
, mem
));
1672 /* This happens after reload, so we need to expand it. */
1674 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1677 if (satisfies_constraint_K (GEN_INT (imm
)))
1679 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1683 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1684 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1685 if (REGNO (src
) == REGNO (scratch
))
1691 /* Return nonzero if this function is known to have a null epilogue. */
1694 direct_return (void)
1696 if (reload_completed
)
1698 if (cfun
->static_chain_decl
== 0
1699 && (spu_saved_regs_size ()
1701 + crtl
->outgoing_args_size
1702 + crtl
->args
.pretend_args_size
== 0)
1710 The stack frame looks like this:
1714 AP -> +-------------+
1717 prev SP | back chain |
1720 | reg save | crtl->args.pretend_args_size bytes
1723 | saved regs | spu_saved_regs_size() bytes
1724 FP -> +-------------+
1726 | vars | get_frame_size() bytes
1727 HFP -> +-------------+
1730 | args | crtl->outgoing_args_size bytes
1736 SP -> +-------------+
1740 spu_expand_prologue (void)
1742 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1743 HOST_WIDE_INT total_size
;
1744 HOST_WIDE_INT saved_regs_size
;
1745 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1746 rtx scratch_reg_0
, scratch_reg_1
;
1750 if (flag_pic
&& optimize
== 0 && !cfun
->machine
->pic_reg
)
1751 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1753 if (spu_naked_function_p (current_function_decl
))
1756 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1757 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1759 saved_regs_size
= spu_saved_regs_size ();
1760 total_size
= size
+ saved_regs_size
1761 + crtl
->outgoing_args_size
1762 + crtl
->args
.pretend_args_size
;
1765 || cfun
->calls_alloca
|| total_size
> 0)
1766 total_size
+= STACK_POINTER_OFFSET
;
1768 /* Save this first because code after this might use the link
1769 register as a scratch register. */
1772 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1773 RTX_FRAME_RELATED_P (insn
) = 1;
1778 offset
= -crtl
->args
.pretend_args_size
;
1779 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1780 if (need_to_save_reg (regno
, 1))
1783 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1784 RTX_FRAME_RELATED_P (insn
) = 1;
1788 if (flag_pic
&& cfun
->machine
->pic_reg
)
1790 rtx pic_reg
= cfun
->machine
->pic_reg
;
1791 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1792 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1797 if (flag_stack_check
)
1799 /* We compare against total_size-1 because
1800 ($sp >= total_size) <=> ($sp > total_size-1) */
1801 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1802 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1803 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1804 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1806 emit_move_insn (scratch_v4si
, size_v4si
);
1807 size_v4si
= scratch_v4si
;
1809 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1810 emit_insn (gen_vec_extractv4si
1811 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1812 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1815 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1816 the value of the previous $sp because we save it as the back
1818 if (total_size
<= 2000)
1820 /* In this case we save the back chain first. */
1821 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1823 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1827 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1829 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1831 RTX_FRAME_RELATED_P (insn
) = 1;
1832 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1833 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1835 if (total_size
> 2000)
1837 /* Save the back chain ptr */
1838 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1841 if (frame_pointer_needed
)
1843 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1844 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1845 + crtl
->outgoing_args_size
;
1846 /* Set the new frame_pointer */
1847 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1848 RTX_FRAME_RELATED_P (insn
) = 1;
1849 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1850 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1851 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1855 if (flag_stack_usage_info
)
1856 current_function_static_stack_size
= total_size
;
1860 spu_expand_epilogue (bool sibcall_p
)
1862 int size
= get_frame_size (), offset
, regno
;
1863 HOST_WIDE_INT saved_regs_size
, total_size
;
1864 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1867 if (spu_naked_function_p (current_function_decl
))
1870 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1872 saved_regs_size
= spu_saved_regs_size ();
1873 total_size
= size
+ saved_regs_size
1874 + crtl
->outgoing_args_size
1875 + crtl
->args
.pretend_args_size
;
1878 || cfun
->calls_alloca
|| total_size
> 0)
1879 total_size
+= STACK_POINTER_OFFSET
;
1883 if (cfun
->calls_alloca
)
1884 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1886 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1889 if (saved_regs_size
> 0)
1891 offset
= -crtl
->args
.pretend_args_size
;
1892 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1893 if (need_to_save_reg (regno
, 1))
1896 frame_emit_load (regno
, sp_reg
, offset
);
1902 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1906 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1907 emit_jump_insn (gen__return ());
1912 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1916 /* This is inefficient because it ends up copying to a save-register
1917 which then gets saved even though $lr has already been saved. But
1918 it does generate better code for leaf functions and we don't need
1919 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1920 used for __builtin_return_address anyway, so maybe we don't care if
1921 it's inefficient. */
1922 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1926 /* Given VAL, generate a constant appropriate for MODE.
1927 If MODE is a vector mode, every element will be VAL.
1928 For TImode, VAL will be zero extended to 128 bits. */
1930 spu_const (machine_mode mode
, HOST_WIDE_INT val
)
1936 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1937 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1938 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1939 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1941 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1942 return immed_double_const (val
, 0, mode
);
1944 /* val is the bit representation of the float */
1945 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1946 return hwint_to_const_double (mode
, val
);
1948 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1949 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1951 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1953 units
= GET_MODE_NUNITS (mode
);
1955 v
= rtvec_alloc (units
);
1957 for (i
= 0; i
< units
; ++i
)
1958 RTVEC_ELT (v
, i
) = inner
;
1960 return gen_rtx_CONST_VECTOR (mode
, v
);
1963 /* Create a MODE vector constant from 4 ints. */
1965 spu_const_from_ints(machine_mode mode
, int a
, int b
, int c
, int d
)
1967 unsigned char arr
[16];
1968 arr
[0] = (a
>> 24) & 0xff;
1969 arr
[1] = (a
>> 16) & 0xff;
1970 arr
[2] = (a
>> 8) & 0xff;
1971 arr
[3] = (a
>> 0) & 0xff;
1972 arr
[4] = (b
>> 24) & 0xff;
1973 arr
[5] = (b
>> 16) & 0xff;
1974 arr
[6] = (b
>> 8) & 0xff;
1975 arr
[7] = (b
>> 0) & 0xff;
1976 arr
[8] = (c
>> 24) & 0xff;
1977 arr
[9] = (c
>> 16) & 0xff;
1978 arr
[10] = (c
>> 8) & 0xff;
1979 arr
[11] = (c
>> 0) & 0xff;
1980 arr
[12] = (d
>> 24) & 0xff;
1981 arr
[13] = (d
>> 16) & 0xff;
1982 arr
[14] = (d
>> 8) & 0xff;
1983 arr
[15] = (d
>> 0) & 0xff;
1984 return array_to_constant(mode
, arr
);
1987 /* branch hint stuff */
1989 /* An array of these is used to propagate hints to predecessor blocks. */
1992 rtx_insn
*prop_jump
; /* propagated from another block */
1993 int bb_index
; /* the original block. */
1995 static struct spu_bb_info
*spu_bb_info
;
1997 #define STOP_HINT_P(INSN) \
1999 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2000 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2002 /* 1 when RTX is a hinted branch or its target. We keep track of
2003 what has been hinted so the safe-hint code can test it easily. */
2004 #define HINTED_P(RTX) \
2005 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2007 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2008 #define SCHED_ON_EVEN_P(RTX) \
2009 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2011 /* Emit a nop for INSN such that the two will dual issue. This assumes
2012 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2013 We check for TImode to handle a MULTI1 insn which has dual issued its
2014 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
2016 emit_nop_for_insn (rtx_insn
*insn
)
2021 /* We need to handle JUMP_TABLE_DATA separately. */
2022 if (JUMP_TABLE_DATA_P (insn
))
2024 new_insn
= emit_insn_after (gen_lnop(), insn
);
2025 recog_memoized (new_insn
);
2026 INSN_LOCATION (new_insn
) = UNKNOWN_LOCATION
;
2030 p
= get_pipe (insn
);
2031 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2032 new_insn
= emit_insn_after (gen_lnop (), insn
);
2033 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2035 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2036 PUT_MODE (new_insn
, TImode
);
2037 PUT_MODE (insn
, VOIDmode
);
2040 new_insn
= emit_insn_after (gen_lnop (), insn
);
2041 recog_memoized (new_insn
);
2042 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
2045 /* Insert nops in basic blocks to meet dual issue alignment
2046 requirements. Also make sure hbrp and hint instructions are at least
2047 one cycle apart, possibly inserting a nop. */
2051 rtx_insn
*insn
, *next_insn
, *prev_insn
, *hbr_insn
= 0;
2055 /* This sets up INSN_ADDRESSES. */
2056 shorten_branches (get_insns ());
2058 /* Keep track of length added by nops. */
2062 insn
= get_insns ();
2063 if (!active_insn_p (insn
))
2064 insn
= next_active_insn (insn
);
2065 for (; insn
; insn
= next_insn
)
2067 next_insn
= next_active_insn (insn
);
2068 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2069 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2073 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2074 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2075 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2078 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2079 PUT_MODE (prev_insn
, GET_MODE (insn
));
2080 PUT_MODE (insn
, TImode
);
2081 INSN_LOCATION (prev_insn
) = INSN_LOCATION (insn
);
2087 if (INSN_CODE (insn
) == CODE_FOR_blockage
&& next_insn
)
2089 if (GET_MODE (insn
) == TImode
)
2090 PUT_MODE (next_insn
, TImode
);
2092 next_insn
= next_active_insn (insn
);
2094 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2095 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2097 if (((addr
+ length
) & 7) != 0)
2099 emit_nop_for_insn (prev_insn
);
2103 else if (GET_MODE (insn
) == TImode
2104 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2105 || get_attr_type (insn
) == TYPE_MULTI0
)
2106 && ((addr
+ length
) & 7) != 0)
2108 /* prev_insn will always be set because the first insn is
2109 always 8-byte aligned. */
2110 emit_nop_for_insn (prev_insn
);
2118 /* Routines for branch hints. */
2121 spu_emit_branch_hint (rtx_insn
*before
, rtx_insn
*branch
, rtx target
,
2122 int distance
, sbitmap blocks
)
2124 rtx branch_label
= 0;
2127 rtx_jump_table_data
*table
;
2129 if (before
== 0 || branch
== 0 || target
== 0)
2132 /* While scheduling we require hints to be no further than 600, so
2133 we need to enforce that here too */
2137 /* If we have a Basic block note, emit it after the basic block note. */
2138 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2139 before
= NEXT_INSN (before
);
2141 branch_label
= gen_label_rtx ();
2142 LABEL_NUSES (branch_label
)++;
2143 LABEL_PRESERVE_P (branch_label
) = 1;
2144 insn
= emit_label_before (branch_label
, branch
);
2145 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2146 bitmap_set_bit (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2148 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2149 recog_memoized (hint
);
2150 INSN_LOCATION (hint
) = INSN_LOCATION (branch
);
2151 HINTED_P (branch
) = 1;
2153 if (GET_CODE (target
) == LABEL_REF
)
2154 HINTED_P (XEXP (target
, 0)) = 1;
2155 else if (tablejump_p (branch
, 0, &table
))
2159 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2160 vec
= XVEC (PATTERN (table
), 0);
2162 vec
= XVEC (PATTERN (table
), 1);
2163 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2164 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2167 if (distance
>= 588)
2169 /* Make sure the hint isn't scheduled any earlier than this point,
2170 which could make it too far for the branch offest to fit */
2171 insn
= emit_insn_before (gen_blockage (), hint
);
2172 recog_memoized (insn
);
2173 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2175 else if (distance
<= 8 * 4)
2177 /* To guarantee at least 8 insns between the hint and branch we
2180 for (d
= distance
; d
< 8 * 4; d
+= 4)
2183 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2184 recog_memoized (insn
);
2185 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2188 /* Make sure any nops inserted aren't scheduled before the hint. */
2189 insn
= emit_insn_after (gen_blockage (), hint
);
2190 recog_memoized (insn
);
2191 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2193 /* Make sure any nops inserted aren't scheduled after the call. */
2194 if (CALL_P (branch
) && distance
< 8 * 4)
2196 insn
= emit_insn_before (gen_blockage (), branch
);
2197 recog_memoized (insn
);
2198 INSN_LOCATION (insn
) = INSN_LOCATION (branch
);
2203 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2204 the rtx for the branch target. */
2206 get_branch_target (rtx_insn
*branch
)
2208 if (JUMP_P (branch
))
2212 /* Return statements */
2213 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2214 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2217 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2220 set
= single_set (branch
);
2221 src
= SET_SRC (set
);
2222 if (GET_CODE (SET_DEST (set
)) != PC
)
2225 if (GET_CODE (src
) == IF_THEN_ELSE
)
2228 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2231 /* If the more probable case is not a fall through, then
2232 try a branch hint. */
2233 int prob
= XINT (note
, 0);
2234 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2235 && GET_CODE (XEXP (src
, 1)) != PC
)
2236 lab
= XEXP (src
, 1);
2237 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2238 && GET_CODE (XEXP (src
, 2)) != PC
)
2239 lab
= XEXP (src
, 2);
2243 if (GET_CODE (lab
) == RETURN
)
2244 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2252 else if (CALL_P (branch
))
2255 /* All of our call patterns are in a PARALLEL and the CALL is
2256 the first pattern in the PARALLEL. */
2257 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2259 call
= XVECEXP (PATTERN (branch
), 0, 0);
2260 if (GET_CODE (call
) == SET
)
2261 call
= SET_SRC (call
);
2262 if (GET_CODE (call
) != CALL
)
2264 return XEXP (XEXP (call
, 0), 0);
2269 /* The special $hbr register is used to prevent the insn scheduler from
2270 moving hbr insns across instructions which invalidate them. It
2271 should only be used in a clobber, and this function searches for
2272 insns which clobber it. */
2274 insn_clobbers_hbr (rtx_insn
*insn
)
2277 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2279 rtx parallel
= PATTERN (insn
);
2282 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2284 clobber
= XVECEXP (parallel
, 0, j
);
2285 if (GET_CODE (clobber
) == CLOBBER
2286 && GET_CODE (XEXP (clobber
, 0)) == REG
2287 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2294 /* Search up to 32 insns starting at FIRST:
2295 - at any kind of hinted branch, just return
2296 - at any unconditional branch in the first 15 insns, just return
2297 - at a call or indirect branch, after the first 15 insns, force it to
2298 an even address and return
2299 - at any unconditional branch, after the first 15 insns, force it to
2301 At then end of the search, insert an hbrp within 4 insns of FIRST,
2302 and an hbrp within 16 instructions of FIRST.
2305 insert_hbrp_for_ilb_runout (rtx_insn
*first
)
2307 rtx_insn
*insn
, *before_4
= 0, *before_16
= 0;
2308 int addr
= 0, length
, first_addr
= -1;
2309 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2310 int insert_lnop_after
= 0;
2311 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2314 if (first_addr
== -1)
2315 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2316 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2317 length
= get_attr_length (insn
);
2319 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2321 /* We test for 14 instructions because the first hbrp will add
2322 up to 2 instructions. */
2323 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2326 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2328 /* Make sure an hbrp is at least 2 cycles away from a hint.
2329 Insert an lnop after the hbrp when necessary. */
2330 if (before_4
== 0 && addr
> 0)
2333 insert_lnop_after
|= 1;
2335 else if (before_4
&& addr
<= 4 * 4)
2336 insert_lnop_after
|= 1;
2337 if (before_16
== 0 && addr
> 10 * 4)
2340 insert_lnop_after
|= 2;
2342 else if (before_16
&& addr
<= 14 * 4)
2343 insert_lnop_after
|= 2;
2346 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2348 if (addr
< hbrp_addr0
)
2350 else if (addr
< hbrp_addr1
)
2354 if (CALL_P (insn
) || JUMP_P (insn
))
2356 if (HINTED_P (insn
))
2359 /* Any branch after the first 15 insns should be on an even
2360 address to avoid a special case branch. There might be
2361 some nops and/or hbrps inserted, so we test after 10
2364 SCHED_ON_EVEN_P (insn
) = 1;
2367 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2371 if (addr
+ length
>= 32 * 4)
2373 gcc_assert (before_4
&& before_16
);
2374 if (hbrp_addr0
> 4 * 4)
2377 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2378 recog_memoized (insn
);
2379 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2380 INSN_ADDRESSES_NEW (insn
,
2381 INSN_ADDRESSES (INSN_UID (before_4
)));
2382 PUT_MODE (insn
, GET_MODE (before_4
));
2383 PUT_MODE (before_4
, TImode
);
2384 if (insert_lnop_after
& 1)
2386 insn
= emit_insn_before (gen_lnop (), before_4
);
2387 recog_memoized (insn
);
2388 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2389 INSN_ADDRESSES_NEW (insn
,
2390 INSN_ADDRESSES (INSN_UID (before_4
)));
2391 PUT_MODE (insn
, TImode
);
2394 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2395 && hbrp_addr1
> 16 * 4)
2398 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2399 recog_memoized (insn
);
2400 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2401 INSN_ADDRESSES_NEW (insn
,
2402 INSN_ADDRESSES (INSN_UID (before_16
)));
2403 PUT_MODE (insn
, GET_MODE (before_16
));
2404 PUT_MODE (before_16
, TImode
);
2405 if (insert_lnop_after
& 2)
2407 insn
= emit_insn_before (gen_lnop (), before_16
);
2408 recog_memoized (insn
);
2409 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2410 INSN_ADDRESSES_NEW (insn
,
2411 INSN_ADDRESSES (INSN_UID
2413 PUT_MODE (insn
, TImode
);
2419 else if (BARRIER_P (insn
))
2424 /* The SPU might hang when it executes 48 inline instructions after a
2425 hinted branch jumps to its hinted target. The beginning of a
2426 function and the return from a call might have been hinted, and
2427 must be handled as well. To prevent a hang we insert 2 hbrps. The
2428 first should be within 6 insns of the branch target. The second
2429 should be within 22 insns of the branch target. When determining
2430 if hbrps are necessary, we look for only 32 inline instructions,
2431 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2432 when inserting new hbrps, we insert them within 4 and 16 insns of
2438 if (TARGET_SAFE_HINTS
)
2440 shorten_branches (get_insns ());
2441 /* Insert hbrp at beginning of function */
2442 insn
= next_active_insn (get_insns ());
2444 insert_hbrp_for_ilb_runout (insn
);
2445 /* Insert hbrp after hinted targets. */
2446 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2447 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2448 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2452 static int in_spu_reorg
;
2455 spu_var_tracking (void)
2457 if (flag_var_tracking
)
2460 timevar_push (TV_VAR_TRACKING
);
2461 variable_tracking_main ();
2462 timevar_pop (TV_VAR_TRACKING
);
2463 df_finish_pass (false);
2467 /* Insert branch hints. There are no branch optimizations after this
2468 pass, so it's safe to set our branch hints now. */
2470 spu_machine_dependent_reorg (void)
2474 rtx_insn
*branch
, *insn
;
2475 rtx branch_target
= 0;
2476 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2480 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2482 /* We still do it for unoptimized code because an external
2483 function might have hinted a call or return. */
2484 compute_bb_for_insn ();
2487 spu_var_tracking ();
2488 free_bb_for_insn ();
2492 blocks
= sbitmap_alloc (last_basic_block_for_fn (cfun
));
2493 bitmap_clear (blocks
);
2496 compute_bb_for_insn ();
2498 /* (Re-)discover loops so that bb->loop_father can be used
2499 in the analysis below. */
2500 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2505 (struct spu_bb_info
*) xcalloc (n_basic_blocks_for_fn (cfun
),
2506 sizeof (struct spu_bb_info
));
2508 /* We need exact insn addresses and lengths. */
2509 shorten_branches (get_insns ());
2511 for (i
= n_basic_blocks_for_fn (cfun
) - 1; i
>= 0; i
--)
2513 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2515 if (spu_bb_info
[i
].prop_jump
)
2517 branch
= spu_bb_info
[i
].prop_jump
;
2518 branch_target
= get_branch_target (branch
);
2519 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2520 required_dist
= spu_hint_dist
;
2522 /* Search from end of a block to beginning. In this loop, find
2523 jumps which need a branch and emit them only when:
2524 - it's an indirect branch and we're at the insn which sets
2526 - we're at an insn that will invalidate the hint. e.g., a
2527 call, another hint insn, inline asm that clobbers $hbr, and
2528 some inlined operations (divmodsi4). Don't consider jumps
2529 because they are only at the end of a block and are
2530 considered when we are deciding whether to propagate
2531 - we're getting too far away from the branch. The hbr insns
2532 only have a signed 10 bit offset
2533 We go back as far as possible so the branch will be considered
2534 for propagation when we get to the beginning of the block. */
2535 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2539 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2541 && ((GET_CODE (branch_target
) == REG
2542 && set_of (branch_target
, insn
) != NULL_RTX
)
2543 || insn_clobbers_hbr (insn
)
2544 || branch_addr
- insn_addr
> 600))
2546 rtx_insn
*next
= NEXT_INSN (insn
);
2547 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2548 if (insn
!= BB_END (bb
)
2549 && branch_addr
- next_addr
>= required_dist
)
2553 "hint for %i in block %i before %i\n",
2554 INSN_UID (branch
), bb
->index
,
2556 spu_emit_branch_hint (next
, branch
, branch_target
,
2557 branch_addr
- next_addr
, blocks
);
2562 /* JUMP_P will only be true at the end of a block. When
2563 branch is already set it means we've previously decided
2564 to propagate a hint for that branch into this block. */
2565 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2568 if ((branch_target
= get_branch_target (insn
)))
2571 branch_addr
= insn_addr
;
2572 required_dist
= spu_hint_dist
;
2576 if (insn
== BB_HEAD (bb
))
2582 /* If we haven't emitted a hint for this branch yet, it might
2583 be profitable to emit it in one of the predecessor blocks,
2584 especially for loops. */
2586 basic_block prev
= 0, prop
= 0, prev2
= 0;
2587 int loop_exit
= 0, simple_loop
= 0;
2588 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2590 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2591 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2592 prev
= EDGE_PRED (bb
, j
)->src
;
2594 prev2
= EDGE_PRED (bb
, j
)->src
;
2596 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2597 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2599 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2602 /* If this branch is a loop exit then propagate to previous
2603 fallthru block. This catches the cases when it is a simple
2604 loop or when there is an initial branch into the loop. */
2605 if (prev
&& (loop_exit
|| simple_loop
)
2606 && bb_loop_depth (prev
) <= bb_loop_depth (bb
))
2609 /* If there is only one adjacent predecessor. Don't propagate
2610 outside this loop. */
2611 else if (prev
&& single_pred_p (bb
)
2612 && prev
->loop_father
== bb
->loop_father
)
2615 /* If this is the JOIN block of a simple IF-THEN then
2616 propagate the hint to the HEADER block. */
2617 else if (prev
&& prev2
2618 && EDGE_COUNT (bb
->preds
) == 2
2619 && EDGE_COUNT (prev
->preds
) == 1
2620 && EDGE_PRED (prev
, 0)->src
== prev2
2621 && prev2
->loop_father
== bb
->loop_father
2622 && GET_CODE (branch_target
) != REG
)
2625 /* Don't propagate when:
2626 - this is a simple loop and the hint would be too far
2627 - this is not a simple loop and there are 16 insns in
2629 - the predecessor block ends in a branch that will be
2631 - the predecessor block ends in an insn that invalidates
2635 && (bbend
= BB_END (prop
))
2636 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2637 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2638 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2641 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2642 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2643 bb
->index
, prop
->index
, bb_loop_depth (bb
),
2644 INSN_UID (branch
), loop_exit
, simple_loop
,
2645 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2647 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2648 spu_bb_info
[prop
->index
].bb_index
= i
;
2650 else if (branch_addr
- next_addr
>= required_dist
)
2653 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2654 INSN_UID (branch
), bb
->index
,
2655 INSN_UID (NEXT_INSN (insn
)));
2656 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2657 branch_addr
- next_addr
, blocks
);
2664 if (!bitmap_empty_p (blocks
))
2665 find_many_sub_basic_blocks (blocks
);
2667 /* We have to schedule to make sure alignment is ok. */
2668 FOR_EACH_BB_FN (bb
, cfun
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2670 /* The hints need to be scheduled, so call it again. */
2672 df_finish_pass (true);
2678 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2679 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2681 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2682 between its branch label and the branch . We don't move the
2683 label because GCC expects it at the beginning of the block. */
2684 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2685 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2686 rtx_insn
*label
= as_a
<rtx_insn
*> (XEXP (label_ref
, 0));
2689 for (branch
= NEXT_INSN (label
);
2690 !JUMP_P (branch
) && !CALL_P (branch
);
2691 branch
= NEXT_INSN (branch
))
2692 if (NONJUMP_INSN_P (branch
))
2693 offset
+= get_attr_length (branch
);
2695 XVECEXP (unspec
, 0, 0) = plus_constant (Pmode
, label_ref
, offset
);
2698 spu_var_tracking ();
2700 loop_optimizer_finalize ();
2702 free_bb_for_insn ();
2708 /* Insn scheduling routines, primarily for dual issue. */
2710 spu_sched_issue_rate (void)
2716 uses_ls_unit(rtx_insn
*insn
)
2718 rtx set
= single_set (insn
);
2720 && (GET_CODE (SET_DEST (set
)) == MEM
2721 || GET_CODE (SET_SRC (set
)) == MEM
))
2727 get_pipe (rtx_insn
*insn
)
2730 /* Handle inline asm */
2731 if (INSN_CODE (insn
) == -1)
2733 t
= get_attr_type (insn
);
2758 case TYPE_IPREFETCH
:
2766 /* haifa-sched.c has a static variable that keeps track of the current
2767 cycle. It is passed to spu_sched_reorder, and we record it here for
2768 use by spu_sched_variable_issue. It won't be accurate if the
2769 scheduler updates it's clock_var between the two calls. */
2770 static int clock_var
;
2772 /* This is used to keep track of insn alignment. Set to 0 at the
2773 beginning of each block and increased by the "length" attr of each
2775 static int spu_sched_length
;
2777 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2778 ready list appropriately in spu_sched_reorder(). */
2779 static int pipe0_clock
;
2780 static int pipe1_clock
;
2782 static int prev_clock_var
;
2784 static int prev_priority
;
2786 /* The SPU needs to load the next ilb sometime during the execution of
2787 the previous ilb. There is a potential conflict if every cycle has a
2788 load or store. To avoid the conflict we make sure the load/store
2789 unit is free for at least one cycle during the execution of insns in
2790 the previous ilb. */
2791 static int spu_ls_first
;
2792 static int prev_ls_clock
;
2795 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2796 int max_ready ATTRIBUTE_UNUSED
)
2798 spu_sched_length
= 0;
2802 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2803 int max_ready ATTRIBUTE_UNUSED
)
2805 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2807 /* When any block might be at least 8-byte aligned, assume they
2808 will all be at least 8-byte aligned to make sure dual issue
2809 works out correctly. */
2810 spu_sched_length
= 0;
2812 spu_ls_first
= INT_MAX
;
2817 prev_clock_var
= -1;
2822 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2823 int verbose ATTRIBUTE_UNUSED
,
2824 rtx_insn
*insn
, int more
)
2828 if (GET_CODE (PATTERN (insn
)) == USE
2829 || GET_CODE (PATTERN (insn
)) == CLOBBER
2830 || (len
= get_attr_length (insn
)) == 0)
2833 spu_sched_length
+= len
;
2835 /* Reset on inline asm */
2836 if (INSN_CODE (insn
) == -1)
2838 spu_ls_first
= INT_MAX
;
2843 p
= get_pipe (insn
);
2845 pipe0_clock
= clock_var
;
2847 pipe1_clock
= clock_var
;
2851 if (clock_var
- prev_ls_clock
> 1
2852 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2853 spu_ls_first
= INT_MAX
;
2854 if (uses_ls_unit (insn
))
2856 if (spu_ls_first
== INT_MAX
)
2857 spu_ls_first
= spu_sched_length
;
2858 prev_ls_clock
= clock_var
;
2861 /* The scheduler hasn't inserted the nop, but we will later on.
2862 Include those nops in spu_sched_length. */
2863 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2864 spu_sched_length
+= 4;
2865 prev_clock_var
= clock_var
;
2867 /* more is -1 when called from spu_sched_reorder for new insns
2868 that don't have INSN_PRIORITY */
2870 prev_priority
= INSN_PRIORITY (insn
);
2873 /* Always try issuing more insns. spu_sched_reorder will decide
2874 when the cycle should be advanced. */
2878 /* This function is called for both TARGET_SCHED_REORDER and
2879 TARGET_SCHED_REORDER2. */
2881 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2882 rtx_insn
**ready
, int *nreadyp
, int clock
)
2884 int i
, nready
= *nreadyp
;
2885 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
2890 if (nready
<= 0 || pipe1_clock
>= clock
)
2893 /* Find any rtl insns that don't generate assembly insns and schedule
2895 for (i
= nready
- 1; i
>= 0; i
--)
2898 if (INSN_CODE (insn
) == -1
2899 || INSN_CODE (insn
) == CODE_FOR_blockage
2900 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
2902 ready
[i
] = ready
[nready
- 1];
2903 ready
[nready
- 1] = insn
;
2908 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
2909 for (i
= 0; i
< nready
; i
++)
2910 if (INSN_CODE (ready
[i
]) != -1)
2913 switch (get_attr_type (insn
))
2938 case TYPE_IPREFETCH
:
2944 /* In the first scheduling phase, schedule loads and stores together
2945 to increase the chance they will get merged during postreload CSE. */
2946 if (!reload_completed
&& pipe_ls
>= 0)
2948 insn
= ready
[pipe_ls
];
2949 ready
[pipe_ls
] = ready
[nready
- 1];
2950 ready
[nready
- 1] = insn
;
2954 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2958 /* When we have loads/stores in every cycle of the last 15 insns and
2959 we are about to schedule another load/store, emit an hbrp insn
2962 && spu_sched_length
- spu_ls_first
>= 4 * 15
2963 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
2965 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2966 recog_memoized (insn
);
2967 if (pipe0_clock
< clock
)
2968 PUT_MODE (insn
, TImode
);
2969 spu_sched_variable_issue (file
, verbose
, insn
, -1);
2973 /* In general, we want to emit nops to increase dual issue, but dual
2974 issue isn't faster when one of the insns could be scheduled later
2975 without effecting the critical path. We look at INSN_PRIORITY to
2976 make a good guess, but it isn't perfect so -mdual-nops=n can be
2977 used to effect it. */
2978 if (in_spu_reorg
&& spu_dual_nops
< 10)
2980 /* When we are at an even address and we are not issuing nops to
2981 improve scheduling then we need to advance the cycle. */
2982 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
2983 && (spu_dual_nops
== 0
2986 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
2989 /* When at an odd address, schedule the highest priority insn
2990 without considering pipeline. */
2991 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
2992 && (spu_dual_nops
== 0
2994 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
2999 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3000 pipe0 insn in the ready list, schedule it. */
3001 if (pipe0_clock
< clock
&& pipe_0
>= 0)
3002 schedule_i
= pipe_0
;
3004 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3005 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3007 schedule_i
= pipe_1
;
3009 if (schedule_i
> -1)
3011 insn
= ready
[schedule_i
];
3012 ready
[schedule_i
] = ready
[nready
- 1];
3013 ready
[nready
- 1] = insn
;
3019 /* INSN is dependent on DEP_INSN. */
3021 spu_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep_insn
, int cost
)
3025 /* The blockage pattern is used to prevent instructions from being
3026 moved across it and has no cost. */
3027 if (INSN_CODE (insn
) == CODE_FOR_blockage
3028 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3031 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3032 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3035 /* Make sure hbrps are spread out. */
3036 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3037 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3040 /* Make sure hints and hbrps are 2 cycles apart. */
3041 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3042 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3043 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3044 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3047 /* An hbrp has no real dependency on other insns. */
3048 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3049 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3052 /* Assuming that it is unlikely an argument register will be used in
3053 the first cycle of the called function, we reduce the cost for
3054 slightly better scheduling of dep_insn. When not hinted, the
3055 mispredicted branch would hide the cost as well. */
3058 rtx target
= get_branch_target (insn
);
3059 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3064 /* And when returning from a function, let's assume the return values
3065 are completed sooner too. */
3066 if (CALL_P (dep_insn
))
3069 /* Make sure an instruction that loads from the back chain is schedule
3070 away from the return instruction so a hint is more likely to get
3072 if (INSN_CODE (insn
) == CODE_FOR__return
3073 && (set
= single_set (dep_insn
))
3074 && GET_CODE (SET_DEST (set
)) == REG
3075 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3078 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3079 scheduler makes every insn in a block anti-dependent on the final
3080 jump_insn. We adjust here so higher cost insns will get scheduled
3082 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3083 return insn_cost (dep_insn
) - 3;
3088 /* Create a CONST_DOUBLE from a string. */
3090 spu_float_const (const char *string
, machine_mode mode
)
3092 REAL_VALUE_TYPE value
;
3093 value
= REAL_VALUE_ATOF (string
, mode
);
3094 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3098 spu_constant_address_p (rtx x
)
3100 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3101 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3102 || GET_CODE (x
) == HIGH
);
3105 static enum spu_immediate
3106 which_immediate_load (HOST_WIDE_INT val
)
3108 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3110 if (val
>= -0x8000 && val
<= 0x7fff)
3112 if (val
>= 0 && val
<= 0x3ffff)
3114 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3116 if ((val
& 0xffff) == 0)
3122 /* Return true when OP can be loaded by one of the il instructions, or
3123 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3125 immediate_load_p (rtx op
, machine_mode mode
)
3127 if (CONSTANT_P (op
))
3129 enum immediate_class c
= classify_immediate (op
, mode
);
3130 return c
== IC_IL1
|| c
== IC_IL1s
3131 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3136 /* Return true if the first SIZE bytes of arr is a constant that can be
3137 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3138 represent the size and offset of the instruction to use. */
3140 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3142 int cpat
, run
, i
, start
;
3146 for (i
= 0; i
< size
&& cpat
; i
++)
3154 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3156 else if (arr
[i
] == 0)
3158 while (arr
[i
+run
] == run
&& i
+run
< 16)
3160 if (run
!= 4 && run
!= 8)
3165 if ((i
& (run
-1)) != 0)
3172 if (cpat
&& (run
|| size
< 16))
3179 *pstart
= start
== -1 ? 16-run
: start
;
3185 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3186 it into a register. MODE is only valid when OP is a CONST_INT. */
3187 static enum immediate_class
3188 classify_immediate (rtx op
, machine_mode mode
)
3191 unsigned char arr
[16];
3192 int i
, j
, repeated
, fsmbi
, repeat
;
3194 gcc_assert (CONSTANT_P (op
));
3196 if (GET_MODE (op
) != VOIDmode
)
3197 mode
= GET_MODE (op
);
3199 /* A V4SI const_vector with all identical symbols is ok. */
3202 && GET_CODE (op
) == CONST_VECTOR
3203 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3204 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3205 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3206 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3207 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3208 op
= CONST_VECTOR_ELT (op
, 0);
3210 switch (GET_CODE (op
))
3214 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3217 /* We can never know if the resulting address fits in 18 bits and can be
3218 loaded with ila. For now, assume the address will not overflow if
3219 the displacement is "small" (fits 'K' constraint). */
3220 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3222 rtx sym
= XEXP (XEXP (op
, 0), 0);
3223 rtx cst
= XEXP (XEXP (op
, 0), 1);
3225 if (GET_CODE (sym
) == SYMBOL_REF
3226 && GET_CODE (cst
) == CONST_INT
3227 && satisfies_constraint_K (cst
))
3236 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3237 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3238 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3244 constant_to_array (mode
, op
, arr
);
3246 /* Check that each 4-byte slot is identical. */
3248 for (i
= 4; i
< 16; i
+= 4)
3249 for (j
= 0; j
< 4; j
++)
3250 if (arr
[j
] != arr
[i
+ j
])
3255 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3256 val
= trunc_int_for_mode (val
, SImode
);
3258 if (which_immediate_load (val
) != SPU_NONE
)
3262 /* Any mode of 2 bytes or smaller can be loaded with an il
3264 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3268 for (i
= 0; i
< 16 && fsmbi
; i
++)
3269 if (arr
[i
] != 0 && repeat
== 0)
3271 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3274 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3276 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3289 static enum spu_immediate
3290 which_logical_immediate (HOST_WIDE_INT val
)
3292 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3294 if (val
>= -0x200 && val
<= 0x1ff)
3296 if (val
>= 0 && val
<= 0xffff)
3298 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3300 val
= trunc_int_for_mode (val
, HImode
);
3301 if (val
>= -0x200 && val
<= 0x1ff)
3303 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3305 val
= trunc_int_for_mode (val
, QImode
);
3306 if (val
>= -0x200 && val
<= 0x1ff)
3313 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3316 const_vector_immediate_p (rtx x
)
3319 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3320 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3321 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3322 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3328 logical_immediate_p (rtx op
, machine_mode mode
)
3331 unsigned char arr
[16];
3334 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3335 || GET_CODE (op
) == CONST_VECTOR
);
3337 if (GET_CODE (op
) == CONST_VECTOR
3338 && !const_vector_immediate_p (op
))
3341 if (GET_MODE (op
) != VOIDmode
)
3342 mode
= GET_MODE (op
);
3344 constant_to_array (mode
, op
, arr
);
3346 /* Check that bytes are repeated. */
3347 for (i
= 4; i
< 16; i
+= 4)
3348 for (j
= 0; j
< 4; j
++)
3349 if (arr
[j
] != arr
[i
+ j
])
3352 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3353 val
= trunc_int_for_mode (val
, SImode
);
3355 i
= which_logical_immediate (val
);
3356 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3360 iohl_immediate_p (rtx op
, machine_mode mode
)
3363 unsigned char arr
[16];
3366 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3367 || GET_CODE (op
) == CONST_VECTOR
);
3369 if (GET_CODE (op
) == CONST_VECTOR
3370 && !const_vector_immediate_p (op
))
3373 if (GET_MODE (op
) != VOIDmode
)
3374 mode
= GET_MODE (op
);
3376 constant_to_array (mode
, op
, arr
);
3378 /* Check that bytes are repeated. */
3379 for (i
= 4; i
< 16; i
+= 4)
3380 for (j
= 0; j
< 4; j
++)
3381 if (arr
[j
] != arr
[i
+ j
])
3384 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3385 val
= trunc_int_for_mode (val
, SImode
);
3387 return val
>= 0 && val
<= 0xffff;
3391 arith_immediate_p (rtx op
, machine_mode mode
,
3392 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3395 unsigned char arr
[16];
3398 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3399 || GET_CODE (op
) == CONST_VECTOR
);
3401 if (GET_CODE (op
) == CONST_VECTOR
3402 && !const_vector_immediate_p (op
))
3405 if (GET_MODE (op
) != VOIDmode
)
3406 mode
= GET_MODE (op
);
3408 constant_to_array (mode
, op
, arr
);
3410 if (VECTOR_MODE_P (mode
))
3411 mode
= GET_MODE_INNER (mode
);
3413 bytes
= GET_MODE_SIZE (mode
);
3414 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3416 /* Check that bytes are repeated. */
3417 for (i
= bytes
; i
< 16; i
+= bytes
)
3418 for (j
= 0; j
< bytes
; j
++)
3419 if (arr
[j
] != arr
[i
+ j
])
3423 for (j
= 1; j
< bytes
; j
++)
3424 val
= (val
<< 8) | arr
[j
];
3426 val
= trunc_int_for_mode (val
, mode
);
3428 return val
>= low
&& val
<= high
;
3431 /* TRUE when op is an immediate and an exact power of 2, and given that
3432 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3433 all entries must be the same. */
3435 exp2_immediate_p (rtx op
, machine_mode mode
, int low
, int high
)
3437 machine_mode int_mode
;
3439 unsigned char arr
[16];
3442 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3443 || GET_CODE (op
) == CONST_VECTOR
);
3445 if (GET_CODE (op
) == CONST_VECTOR
3446 && !const_vector_immediate_p (op
))
3449 if (GET_MODE (op
) != VOIDmode
)
3450 mode
= GET_MODE (op
);
3452 constant_to_array (mode
, op
, arr
);
3454 if (VECTOR_MODE_P (mode
))
3455 mode
= GET_MODE_INNER (mode
);
3457 bytes
= GET_MODE_SIZE (mode
);
3458 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3460 /* Check that bytes are repeated. */
3461 for (i
= bytes
; i
< 16; i
+= bytes
)
3462 for (j
= 0; j
< bytes
; j
++)
3463 if (arr
[j
] != arr
[i
+ j
])
3467 for (j
= 1; j
< bytes
; j
++)
3468 val
= (val
<< 8) | arr
[j
];
3470 val
= trunc_int_for_mode (val
, int_mode
);
3472 /* Currently, we only handle SFmode */
3473 gcc_assert (mode
== SFmode
);
3476 int exp
= (val
>> 23) - 127;
3477 return val
> 0 && (val
& 0x007fffff) == 0
3478 && exp
>= low
&& exp
<= high
;
3483 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3486 ea_symbol_ref_p (const_rtx x
)
3490 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3492 rtx plus
= XEXP (x
, 0);
3493 rtx op0
= XEXP (plus
, 0);
3494 rtx op1
= XEXP (plus
, 1);
3495 if (GET_CODE (op1
) == CONST_INT
)
3499 return (GET_CODE (x
) == SYMBOL_REF
3500 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3501 && TREE_CODE (decl
) == VAR_DECL
3502 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3506 - any 32-bit constant (SImode, SFmode)
3507 - any constant that can be generated with fsmbi (any mode)
3508 - a 64-bit constant where the high and low bits are identical
3510 - a 128-bit constant where the four 32-bit words match. */
3512 spu_legitimate_constant_p (machine_mode mode
, rtx x
)
3514 subrtx_iterator::array_type array
;
3515 if (GET_CODE (x
) == HIGH
)
3518 /* Reject any __ea qualified reference. These can't appear in
3519 instructions but must be forced to the constant pool. */
3520 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3521 if (ea_symbol_ref_p (*iter
))
3524 /* V4SI with all identical symbols is valid. */
3527 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3528 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3529 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3530 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3531 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3532 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3534 if (GET_CODE (x
) == CONST_VECTOR
3535 && !const_vector_immediate_p (x
))
3540 /* Valid address are:
3541 - symbol_ref, label_ref, const
3543 - reg + const_int, where const_int is 16 byte aligned
3544 - reg + reg, alignment doesn't matter
3545 The alignment matters in the reg+const case because lqd and stqd
3546 ignore the 4 least significant bits of the const. We only care about
3547 16 byte modes because the expand phase will change all smaller MEM
3548 references to TImode. */
3550 spu_legitimate_address_p (machine_mode mode
,
3551 rtx x
, bool reg_ok_strict
)
3553 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3555 && GET_CODE (x
) == AND
3556 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3557 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3559 switch (GET_CODE (x
))
3562 return !TARGET_LARGE_MEM
;
3566 /* Keep __ea references until reload so that spu_expand_mov can see them
3568 if (ea_symbol_ref_p (x
))
3569 return !reload_in_progress
&& !reload_completed
;
3570 return !TARGET_LARGE_MEM
;
3573 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3581 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3586 rtx op0
= XEXP (x
, 0);
3587 rtx op1
= XEXP (x
, 1);
3588 if (GET_CODE (op0
) == SUBREG
)
3589 op0
= XEXP (op0
, 0);
3590 if (GET_CODE (op1
) == SUBREG
)
3591 op1
= XEXP (op1
, 0);
3592 if (GET_CODE (op0
) == REG
3593 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3594 && GET_CODE (op1
) == CONST_INT
3595 && ((INTVAL (op1
) >= -0x2000 && INTVAL (op1
) <= 0x1fff)
3596 /* If virtual registers are involved, the displacement will
3597 change later on anyway, so checking would be premature.
3598 Reload will make sure the final displacement after
3599 register elimination is OK. */
3600 || op0
== arg_pointer_rtx
3601 || op0
== frame_pointer_rtx
3602 || op0
== virtual_stack_vars_rtx
)
3603 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3605 if (GET_CODE (op0
) == REG
3606 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3607 && GET_CODE (op1
) == REG
3608 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3619 /* Like spu_legitimate_address_p, except with named addresses. */
3621 spu_addr_space_legitimate_address_p (machine_mode mode
, rtx x
,
3622 bool reg_ok_strict
, addr_space_t as
)
3624 if (as
== ADDR_SPACE_EA
)
3625 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3627 else if (as
!= ADDR_SPACE_GENERIC
)
3630 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3633 /* When the address is reg + const_int, force the const_int into a
3636 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3637 machine_mode mode ATTRIBUTE_UNUSED
)
3640 /* Make sure both operands are registers. */
3641 if (GET_CODE (x
) == PLUS
)
3645 if (ALIGNED_SYMBOL_REF_P (op0
))
3647 op0
= force_reg (Pmode
, op0
);
3648 mark_reg_pointer (op0
, 128);
3650 else if (GET_CODE (op0
) != REG
)
3651 op0
= force_reg (Pmode
, op0
);
3652 if (ALIGNED_SYMBOL_REF_P (op1
))
3654 op1
= force_reg (Pmode
, op1
);
3655 mark_reg_pointer (op1
, 128);
3657 else if (GET_CODE (op1
) != REG
)
3658 op1
= force_reg (Pmode
, op1
);
3659 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3664 /* Like spu_legitimate_address, except with named address support. */
3666 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
,
3669 if (as
!= ADDR_SPACE_GENERIC
)
3672 return spu_legitimize_address (x
, oldx
, mode
);
3675 /* Reload reg + const_int for out-of-range displacements. */
3677 spu_legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
3678 int opnum
, int type
)
3680 bool removed_and
= false;
3682 if (GET_CODE (ad
) == AND
3683 && CONST_INT_P (XEXP (ad
, 1))
3684 && INTVAL (XEXP (ad
, 1)) == (HOST_WIDE_INT
) - 16)
3690 if (GET_CODE (ad
) == PLUS
3691 && REG_P (XEXP (ad
, 0))
3692 && CONST_INT_P (XEXP (ad
, 1))
3693 && !(INTVAL (XEXP (ad
, 1)) >= -0x2000
3694 && INTVAL (XEXP (ad
, 1)) <= 0x1fff))
3696 /* Unshare the sum. */
3699 /* Reload the displacement. */
3700 push_reload (XEXP (ad
, 1), NULL_RTX
, &XEXP (ad
, 1), NULL
,
3701 BASE_REG_CLASS
, GET_MODE (ad
), VOIDmode
, 0, 0,
3702 opnum
, (enum reload_type
) type
);
3704 /* Add back AND for alignment if we stripped it. */
3706 ad
= gen_rtx_AND (GET_MODE (ad
), ad
, GEN_INT (-16));
3714 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3715 struct attribute_spec.handler. */
3717 spu_handle_fndecl_attribute (tree
* node
,
3719 tree args ATTRIBUTE_UNUSED
,
3720 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3722 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3724 warning (0, "%qE attribute only applies to functions",
3726 *no_add_attrs
= true;
3732 /* Handle the "vector" attribute. */
3734 spu_handle_vector_attribute (tree
* node
, tree name
,
3735 tree args ATTRIBUTE_UNUSED
,
3736 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3738 tree type
= *node
, result
= NULL_TREE
;
3742 while (POINTER_TYPE_P (type
)
3743 || TREE_CODE (type
) == FUNCTION_TYPE
3744 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3745 type
= TREE_TYPE (type
);
3747 mode
= TYPE_MODE (type
);
3749 unsigned_p
= TYPE_UNSIGNED (type
);
3753 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3756 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3759 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3762 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3765 result
= V4SF_type_node
;
3768 result
= V2DF_type_node
;
3774 /* Propagate qualifiers attached to the element type
3775 onto the vector type. */
3776 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3777 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3779 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3782 warning (0, "%qE attribute ignored", name
);
3784 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3789 /* Return nonzero if FUNC is a naked function. */
3791 spu_naked_function_p (tree func
)
3795 if (TREE_CODE (func
) != FUNCTION_DECL
)
3798 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3799 return a
!= NULL_TREE
;
3803 spu_initial_elimination_offset (int from
, int to
)
3805 int saved_regs_size
= spu_saved_regs_size ();
3807 if (!crtl
->is_leaf
|| crtl
->outgoing_args_size
3808 || get_frame_size () || saved_regs_size
)
3809 sp_offset
= STACK_POINTER_OFFSET
;
3810 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3811 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3812 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3813 return get_frame_size ();
3814 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3815 return sp_offset
+ crtl
->outgoing_args_size
3816 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3817 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3818 return get_frame_size () + saved_regs_size
+ sp_offset
;
3824 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3826 machine_mode mode
= TYPE_MODE (type
);
3827 int byte_size
= ((mode
== BLKmode
)
3828 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3830 /* Make sure small structs are left justified in a register. */
3831 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3832 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3837 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3838 int n
= byte_size
/ UNITS_PER_WORD
;
3839 v
= rtvec_alloc (nregs
);
3840 for (i
= 0; i
< n
; i
++)
3842 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3843 gen_rtx_REG (TImode
,
3846 GEN_INT (UNITS_PER_WORD
* i
));
3847 byte_size
-= UNITS_PER_WORD
;
3855 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3857 gen_rtx_EXPR_LIST (VOIDmode
,
3858 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3859 GEN_INT (UNITS_PER_WORD
* n
));
3861 return gen_rtx_PARALLEL (mode
, v
);
3863 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3867 spu_function_arg (cumulative_args_t cum_v
,
3869 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3871 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3874 if (*cum
>= MAX_REGISTER_ARGS
)
3877 byte_size
= ((mode
== BLKmode
)
3878 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3880 /* The ABI does not allow parameters to be passed partially in
3881 reg and partially in stack. */
3882 if ((*cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3885 /* Make sure small structs are left justified in a register. */
3886 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3887 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3893 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3894 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3895 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ *cum
),
3897 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3900 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ *cum
);
3904 spu_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
3905 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3907 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3909 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
3912 ? ((int_size_in_bytes (type
) + 15) / 16)
3915 : HARD_REGNO_NREGS (cum
, mode
));
3918 /* Variable sized types are passed by reference. */
3920 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
3921 machine_mode mode ATTRIBUTE_UNUSED
,
3922 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3924 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3930 /* Create and return the va_list datatype.
3932 On SPU, va_list is an array type equivalent to
3934 typedef struct __va_list_tag
3936 void *__args __attribute__((__aligned(16)));
3937 void *__skip __attribute__((__aligned(16)));
3941 where __args points to the arg that will be returned by the next
3942 va_arg(), and __skip points to the previous stack frame such that
3943 when __args == __skip we should advance __args by 32 bytes. */
3945 spu_build_builtin_va_list (void)
3947 tree f_args
, f_skip
, record
, type_decl
;
3950 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3953 build_decl (BUILTINS_LOCATION
,
3954 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3956 f_args
= build_decl (BUILTINS_LOCATION
,
3957 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3958 f_skip
= build_decl (BUILTINS_LOCATION
,
3959 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3961 DECL_FIELD_CONTEXT (f_args
) = record
;
3962 DECL_ALIGN (f_args
) = 128;
3963 DECL_USER_ALIGN (f_args
) = 1;
3965 DECL_FIELD_CONTEXT (f_skip
) = record
;
3966 DECL_ALIGN (f_skip
) = 128;
3967 DECL_USER_ALIGN (f_skip
) = 1;
3969 TYPE_STUB_DECL (record
) = type_decl
;
3970 TYPE_NAME (record
) = type_decl
;
3971 TYPE_FIELDS (record
) = f_args
;
3972 DECL_CHAIN (f_args
) = f_skip
;
3974 /* We know this is being padded and we want it too. It is an internal
3975 type so hide the warnings from the user. */
3977 warn_padded
= false;
3979 layout_type (record
);
3983 /* The correct type is an array type of one element. */
3984 return build_array_type (record
, build_index_type (size_zero_node
));
3987 /* Implement va_start by filling the va_list structure VALIST.
3988 NEXTARG points to the first anonymous stack argument.
3990 The following global variables are used to initialize
3991 the va_list structure:
3994 the CUMULATIVE_ARGS for this function
3996 crtl->args.arg_offset_rtx:
3997 holds the offset of the first anonymous stack argument
3998 (relative to the virtual arg pointer). */
4001 spu_va_start (tree valist
, rtx nextarg
)
4003 tree f_args
, f_skip
;
4006 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4007 f_skip
= DECL_CHAIN (f_args
);
4009 valist
= build_simple_mem_ref (valist
);
4011 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4013 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4015 /* Find the __args area. */
4016 t
= make_tree (TREE_TYPE (args
), nextarg
);
4017 if (crtl
->args
.pretend_args_size
> 0)
4018 t
= fold_build_pointer_plus_hwi (t
, -STACK_POINTER_OFFSET
);
4019 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
4020 TREE_SIDE_EFFECTS (t
) = 1;
4021 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4023 /* Find the __skip area. */
4024 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4025 t
= fold_build_pointer_plus_hwi (t
, (crtl
->args
.pretend_args_size
4026 - STACK_POINTER_OFFSET
));
4027 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4028 TREE_SIDE_EFFECTS (t
) = 1;
4029 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4032 /* Gimplify va_arg by updating the va_list structure
4033 VALIST as required to retrieve an argument of type
4034 TYPE, and returning that argument.
4036 ret = va_arg(VALIST, TYPE);
4038 generates code equivalent to:
4040 paddedsize = (sizeof(TYPE) + 15) & -16;
4041 if (VALIST.__args + paddedsize > VALIST.__skip
4042 && VALIST.__args <= VALIST.__skip)
4043 addr = VALIST.__skip + 32;
4045 addr = VALIST.__args;
4046 VALIST.__args = addr + paddedsize;
4047 ret = *(TYPE *)addr;
4050 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4051 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4053 tree f_args
, f_skip
;
4055 HOST_WIDE_INT size
, rsize
;
4057 bool pass_by_reference_p
;
4059 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4060 f_skip
= DECL_CHAIN (f_args
);
4062 valist
= build_simple_mem_ref (valist
);
4064 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4066 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4068 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4070 /* if an object is dynamically sized, a pointer to it is passed
4071 instead of the object itself. */
4072 pass_by_reference_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4074 if (pass_by_reference_p
)
4075 type
= build_pointer_type (type
);
4076 size
= int_size_in_bytes (type
);
4077 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4079 /* build conditional expression to calculate addr. The expression
4080 will be gimplified later. */
4081 tmp
= fold_build_pointer_plus_hwi (unshare_expr (args
), rsize
);
4082 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4083 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4084 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4085 unshare_expr (skip
)));
4087 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4088 fold_build_pointer_plus_hwi (unshare_expr (skip
), 32),
4089 unshare_expr (args
));
4091 gimplify_assign (addr
, tmp
, pre_p
);
4093 /* update VALIST.__args */
4094 tmp
= fold_build_pointer_plus_hwi (addr
, rsize
);
4095 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4097 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4100 if (pass_by_reference_p
)
4101 addr
= build_va_arg_indirect_ref (addr
);
4103 return build_va_arg_indirect_ref (addr
);
4106 /* Save parameter registers starting with the register that corresponds
4107 to the first unnamed parameters. If the first unnamed parameter is
4108 in the stack then save no registers. Set pretend_args_size to the
4109 amount of space needed to save the registers. */
4111 spu_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4112 tree type
, int *pretend_size
, int no_rtl
)
4119 int ncum
= *get_cumulative_args (cum
);
4121 /* cum currently points to the last named argument, we want to
4122 start at the next argument. */
4123 spu_function_arg_advance (pack_cumulative_args (&ncum
), mode
, type
, true);
4125 offset
= -STACK_POINTER_OFFSET
;
4126 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4128 tmp
= gen_frame_mem (V4SImode
,
4129 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4131 emit_move_insn (tmp
,
4132 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4135 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4140 spu_conditional_register_usage (void)
4144 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4145 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4149 /* This is called any time we inspect the alignment of a register for
4152 reg_aligned_for_addr (rtx x
)
4155 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4156 return REGNO_POINTER_ALIGN (regno
) >= 128;
4159 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4160 into its SYMBOL_REF_FLAGS. */
4162 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4164 default_encode_section_info (decl
, rtl
, first
);
4166 /* If a variable has a forced alignment to < 16 bytes, mark it with
4167 SYMBOL_FLAG_ALIGN1. */
4168 if (TREE_CODE (decl
) == VAR_DECL
4169 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4170 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4173 /* Return TRUE if we are certain the mem refers to a complete object
4174 which is both 16-byte aligned and padded to a 16-byte boundary. This
4175 would make it safe to store with a single instruction.
4176 We guarantee the alignment and padding for static objects by aligning
4177 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4178 FIXME: We currently cannot guarantee this for objects on the stack
4179 because assign_parm_setup_stack calls assign_stack_local with the
4180 alignment of the parameter mode and in that case the alignment never
4181 gets adjusted by LOCAL_ALIGNMENT. */
4183 store_with_one_insn_p (rtx mem
)
4185 machine_mode mode
= GET_MODE (mem
);
4186 rtx addr
= XEXP (mem
, 0);
4187 if (mode
== BLKmode
)
4189 if (GET_MODE_SIZE (mode
) >= 16)
4191 /* Only static objects. */
4192 if (GET_CODE (addr
) == SYMBOL_REF
)
4194 /* We use the associated declaration to make sure the access is
4195 referring to the whole object.
4196 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4197 if it is necessary. Will there be cases where one exists, and
4198 the other does not? Will there be cases where both exist, but
4199 have different types? */
4200 tree decl
= MEM_EXPR (mem
);
4202 && TREE_CODE (decl
) == VAR_DECL
4203 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4205 decl
= SYMBOL_REF_DECL (addr
);
4207 && TREE_CODE (decl
) == VAR_DECL
4208 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4214 /* Return 1 when the address is not valid for a simple load and store as
4215 required by the '_mov*' patterns. We could make this less strict
4216 for loads, but we prefer mem's to look the same so they are more
4217 likely to be merged. */
4219 address_needs_split (rtx mem
)
4221 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4222 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4223 || !(store_with_one_insn_p (mem
)
4224 || mem_is_padded_component_ref (mem
))))
4230 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4231 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4232 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4234 /* MEM is known to be an __ea qualified memory access. Emit a call to
4235 fetch the ppu memory to local store, and return its address in local
4239 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4243 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4244 if (!cache_fetch_dirty
)
4245 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4246 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4247 2, ea_addr
, EAmode
, ndirty
, SImode
);
4252 cache_fetch
= init_one_libfunc ("__cache_fetch");
4253 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4254 1, ea_addr
, EAmode
);
4258 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4259 dirty bit marking, inline.
4261 The cache control data structure is an array of
4263 struct __cache_tag_array
4265 unsigned int tag_lo[4];
4266 unsigned int tag_hi[4];
4267 void *data_pointer[4];
4269 vector unsigned short dirty_bits[4];
4273 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4277 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4278 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4279 rtx index_mask
= gen_reg_rtx (SImode
);
4280 rtx tag_arr
= gen_reg_rtx (Pmode
);
4281 rtx splat_mask
= gen_reg_rtx (TImode
);
4282 rtx splat
= gen_reg_rtx (V4SImode
);
4283 rtx splat_hi
= NULL_RTX
;
4284 rtx tag_index
= gen_reg_rtx (Pmode
);
4285 rtx block_off
= gen_reg_rtx (SImode
);
4286 rtx tag_addr
= gen_reg_rtx (Pmode
);
4287 rtx tag
= gen_reg_rtx (V4SImode
);
4288 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4289 rtx cache_tag_hi
= NULL_RTX
;
4290 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4291 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4292 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4293 rtx tag_equal_hi
= NULL_RTX
;
4294 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4295 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4296 rtx eq_index
= gen_reg_rtx (SImode
);
4297 rtx bcomp
, hit_label
, hit_ref
, cont_label
;
4300 if (spu_ea_model
!= 32)
4302 splat_hi
= gen_reg_rtx (V4SImode
);
4303 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4304 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4307 emit_move_insn (index_mask
, plus_constant (Pmode
, tag_size_sym
, -128));
4308 emit_move_insn (tag_arr
, tag_arr_sym
);
4309 v
= 0x0001020300010203LL
;
4310 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4311 ea_addr_si
= ea_addr
;
4312 if (spu_ea_model
!= 32)
4313 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4315 /* tag_index = ea_addr & (tag_array_size - 128) */
4316 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4318 /* splat ea_addr to all 4 slots. */
4319 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4320 /* Similarly for high 32 bits of ea_addr. */
4321 if (spu_ea_model
!= 32)
4322 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4324 /* block_off = ea_addr & 127 */
4325 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4327 /* tag_addr = tag_arr + tag_index */
4328 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4330 /* Read cache tags. */
4331 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4332 if (spu_ea_model
!= 32)
4333 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4334 plus_constant (Pmode
,
4337 /* tag = ea_addr & -128 */
4338 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4340 /* Read all four cache data pointers. */
4341 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4342 plus_constant (Pmode
,
4346 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4347 if (spu_ea_model
!= 32)
4349 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4350 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4353 /* At most one of the tags compare equal, so tag_equal has one
4354 32-bit slot set to all 1's, with the other slots all zero.
4355 gbb picks off low bit from each byte in the 128-bit registers,
4356 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4358 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4359 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4361 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4362 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4364 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4365 (rotating eq_index mod 16 bytes). */
4366 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4367 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4369 /* Add block offset to form final data address. */
4370 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4372 /* Check that we did hit. */
4373 hit_label
= gen_label_rtx ();
4374 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4375 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4376 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4377 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4379 /* Say that this branch is very likely to happen. */
4380 v
= REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100 - 1;
4381 add_int_reg_note (insn
, REG_BR_PROB
, v
);
4383 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4384 cont_label
= gen_label_rtx ();
4385 emit_jump_insn (gen_jump (cont_label
));
4388 emit_label (hit_label
);
4393 rtx dirty_bits
= gen_reg_rtx (TImode
);
4394 rtx dirty_off
= gen_reg_rtx (SImode
);
4395 rtx dirty_128
= gen_reg_rtx (TImode
);
4396 rtx neg_block_off
= gen_reg_rtx (SImode
);
4398 /* Set up mask with one dirty bit per byte of the mem we are
4399 writing, starting from top bit. */
4401 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4402 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4407 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4409 /* Form index into cache dirty_bits. eq_index is one of
4410 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4411 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4412 offset to each of the four dirty_bits elements. */
4413 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4415 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4417 /* Rotate bit mask to proper bit. */
4418 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4419 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4420 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4422 /* Or in the new dirty bits. */
4423 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4426 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4429 emit_label (cont_label
);
4433 expand_ea_mem (rtx mem
, bool is_store
)
4436 rtx data_addr
= gen_reg_rtx (Pmode
);
4439 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4440 if (optimize_size
|| optimize
== 0)
4441 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4443 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4445 if (ea_alias_set
== -1)
4446 ea_alias_set
= new_alias_set ();
4448 /* We generate a new MEM RTX to refer to the copy of the data
4449 in the cache. We do not copy memory attributes (except the
4450 alignment) from the original MEM, as they may no longer apply
4451 to the cache copy. */
4452 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4453 set_mem_alias_set (new_mem
, ea_alias_set
);
4454 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4460 spu_expand_mov (rtx
* ops
, machine_mode mode
)
4462 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4464 /* Perform the move in the destination SUBREG's inner mode. */
4465 ops
[0] = SUBREG_REG (ops
[0]);
4466 mode
= GET_MODE (ops
[0]);
4467 ops
[1] = gen_lowpart_common (mode
, ops
[1]);
4468 gcc_assert (ops
[1]);
4471 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4473 rtx from
= SUBREG_REG (ops
[1]);
4474 machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4476 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4477 && GET_MODE_CLASS (imode
) == MODE_INT
4478 && subreg_lowpart_p (ops
[1]));
4480 if (GET_MODE_SIZE (imode
) < 4)
4482 if (imode
!= GET_MODE (from
))
4483 from
= gen_rtx_SUBREG (imode
, from
, 0);
4485 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4487 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4489 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4492 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4496 /* At least one of the operands needs to be a register. */
4497 if ((reload_in_progress
| reload_completed
) == 0
4498 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4500 rtx temp
= force_reg (mode
, ops
[1]);
4501 emit_move_insn (ops
[0], temp
);
4504 if (reload_in_progress
|| reload_completed
)
4506 if (CONSTANT_P (ops
[1]))
4507 return spu_split_immediate (ops
);
4511 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4513 if (GET_CODE (ops
[1]) == CONST_INT
)
4515 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4516 if (val
!= INTVAL (ops
[1]))
4518 emit_move_insn (ops
[0], GEN_INT (val
));
4524 if (MEM_ADDR_SPACE (ops
[0]))
4525 ops
[0] = expand_ea_mem (ops
[0], true);
4526 return spu_split_store (ops
);
4530 if (MEM_ADDR_SPACE (ops
[1]))
4531 ops
[1] = expand_ea_mem (ops
[1], false);
4532 return spu_split_load (ops
);
4539 spu_convert_move (rtx dst
, rtx src
)
4541 machine_mode mode
= GET_MODE (dst
);
4542 machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4544 gcc_assert (GET_MODE (src
) == TImode
);
4545 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4546 emit_insn (gen_rtx_SET (VOIDmode
, reg
,
4547 gen_rtx_TRUNCATE (int_mode
,
4548 gen_rtx_LSHIFTRT (TImode
, src
,
4549 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4550 if (int_mode
!= mode
)
4552 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4553 emit_move_insn (dst
, reg
);
4557 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4558 the address from SRC and SRC+16. Return a REG or CONST_INT that
4559 specifies how many bytes to rotate the loaded registers, plus any
4560 extra from EXTRA_ROTQBY. The address and rotate amounts are
4561 normalized to improve merging of loads and rotate computations. */
4563 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4565 rtx addr
= XEXP (src
, 0);
4566 rtx p0
, p1
, rot
, addr0
, addr1
;
4572 if (MEM_ALIGN (src
) >= 128)
4573 /* Address is already aligned; simply perform a TImode load. */ ;
4574 else if (GET_CODE (addr
) == PLUS
)
4577 aligned reg + aligned reg => lqx
4578 aligned reg + unaligned reg => lqx, rotqby
4579 aligned reg + aligned const => lqd
4580 aligned reg + unaligned const => lqd, rotqbyi
4581 unaligned reg + aligned reg => lqx, rotqby
4582 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4583 unaligned reg + aligned const => lqd, rotqby
4584 unaligned reg + unaligned const -> not allowed by legitimate address
4586 p0
= XEXP (addr
, 0);
4587 p1
= XEXP (addr
, 1);
4588 if (!reg_aligned_for_addr (p0
))
4590 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4592 rot
= gen_reg_rtx (SImode
);
4593 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4595 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4599 && INTVAL (p1
) * BITS_PER_UNIT
4600 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4602 rot
= gen_reg_rtx (SImode
);
4603 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4608 rtx x
= gen_reg_rtx (SImode
);
4609 emit_move_insn (x
, p1
);
4610 if (!spu_arith_operand (p1
, SImode
))
4612 rot
= gen_reg_rtx (SImode
);
4613 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4614 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4622 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4624 rot_amt
= INTVAL (p1
) & 15;
4625 if (INTVAL (p1
) & -16)
4627 p1
= GEN_INT (INTVAL (p1
) & -16);
4628 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4633 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4637 else if (REG_P (addr
))
4639 if (!reg_aligned_for_addr (addr
))
4642 else if (GET_CODE (addr
) == CONST
)
4644 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4645 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4646 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4648 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4650 addr
= gen_rtx_CONST (Pmode
,
4651 gen_rtx_PLUS (Pmode
,
4652 XEXP (XEXP (addr
, 0), 0),
4653 GEN_INT (rot_amt
& -16)));
4655 addr
= XEXP (XEXP (addr
, 0), 0);
4659 rot
= gen_reg_rtx (Pmode
);
4660 emit_move_insn (rot
, addr
);
4663 else if (GET_CODE (addr
) == CONST_INT
)
4665 rot_amt
= INTVAL (addr
);
4666 addr
= GEN_INT (rot_amt
& -16);
4668 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4670 rot
= gen_reg_rtx (Pmode
);
4671 emit_move_insn (rot
, addr
);
4674 rot_amt
+= extra_rotby
;
4680 rtx x
= gen_reg_rtx (SImode
);
4681 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4685 if (!rot
&& rot_amt
)
4686 rot
= GEN_INT (rot_amt
);
4688 addr0
= copy_rtx (addr
);
4689 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4690 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4694 addr1
= plus_constant (SImode
, copy_rtx (addr
), 16);
4695 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4696 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4703 spu_split_load (rtx
* ops
)
4705 machine_mode mode
= GET_MODE (ops
[0]);
4706 rtx addr
, load
, rot
;
4709 if (GET_MODE_SIZE (mode
) >= 16)
4712 addr
= XEXP (ops
[1], 0);
4713 gcc_assert (GET_CODE (addr
) != AND
);
4715 if (!address_needs_split (ops
[1]))
4717 ops
[1] = change_address (ops
[1], TImode
, addr
);
4718 load
= gen_reg_rtx (TImode
);
4719 emit_insn (gen__movti (load
, ops
[1]));
4720 spu_convert_move (ops
[0], load
);
4724 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4726 load
= gen_reg_rtx (TImode
);
4727 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4730 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4732 spu_convert_move (ops
[0], load
);
4737 spu_split_store (rtx
* ops
)
4739 machine_mode mode
= GET_MODE (ops
[0]);
4741 rtx addr
, p0
, p1
, p1_lo
, smem
;
4745 if (GET_MODE_SIZE (mode
) >= 16)
4748 addr
= XEXP (ops
[0], 0);
4749 gcc_assert (GET_CODE (addr
) != AND
);
4751 if (!address_needs_split (ops
[0]))
4753 reg
= gen_reg_rtx (TImode
);
4754 emit_insn (gen_spu_convert (reg
, ops
[1]));
4755 ops
[0] = change_address (ops
[0], TImode
, addr
);
4756 emit_move_insn (ops
[0], reg
);
4760 if (GET_CODE (addr
) == PLUS
)
4763 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4764 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4765 aligned reg + aligned const => lqd, c?d, shuf, stqx
4766 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4767 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4768 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4769 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4770 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4773 p0
= XEXP (addr
, 0);
4774 p1
= p1_lo
= XEXP (addr
, 1);
4775 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4777 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4778 if (reg_aligned_for_addr (p0
))
4780 p1
= GEN_INT (INTVAL (p1
) & -16);
4781 if (p1
== const0_rtx
)
4784 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4788 rtx x
= gen_reg_rtx (SImode
);
4789 emit_move_insn (x
, p1
);
4790 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4794 else if (REG_P (addr
))
4798 p1
= p1_lo
= const0_rtx
;
4803 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4804 p1
= 0; /* aform doesn't use p1 */
4806 if (ALIGNED_SYMBOL_REF_P (addr
))
4808 else if (GET_CODE (addr
) == CONST
4809 && GET_CODE (XEXP (addr
, 0)) == PLUS
4810 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4811 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4813 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4815 addr
= gen_rtx_CONST (Pmode
,
4816 gen_rtx_PLUS (Pmode
,
4817 XEXP (XEXP (addr
, 0), 0),
4818 GEN_INT (v
& -16)));
4820 addr
= XEXP (XEXP (addr
, 0), 0);
4821 p1_lo
= GEN_INT (v
& 15);
4823 else if (GET_CODE (addr
) == CONST_INT
)
4825 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4826 addr
= GEN_INT (INTVAL (addr
) & -16);
4830 p1_lo
= gen_reg_rtx (SImode
);
4831 emit_move_insn (p1_lo
, addr
);
4835 gcc_assert (aform
== 0 || aform
== 1);
4836 reg
= gen_reg_rtx (TImode
);
4838 scalar
= store_with_one_insn_p (ops
[0]);
4841 /* We could copy the flags from the ops[0] MEM to mem here,
4842 We don't because we want this load to be optimized away if
4843 possible, and copying the flags will prevent that in certain
4844 cases, e.g. consider the volatile flag. */
4846 rtx pat
= gen_reg_rtx (TImode
);
4847 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4848 set_mem_alias_set (lmem
, 0);
4849 emit_insn (gen_movti (reg
, lmem
));
4851 if (!p0
|| reg_aligned_for_addr (p0
))
4852 p0
= stack_pointer_rtx
;
4856 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4857 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4861 if (GET_CODE (ops
[1]) == REG
)
4862 emit_insn (gen_spu_convert (reg
, ops
[1]));
4863 else if (GET_CODE (ops
[1]) == SUBREG
)
4864 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4869 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4870 emit_insn (gen_ashlti3
4871 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4873 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4874 /* We can't use the previous alias set because the memory has changed
4875 size and can potentially overlap objects of other types. */
4876 set_mem_alias_set (smem
, 0);
4878 emit_insn (gen_movti (smem
, reg
));
4882 /* Return TRUE if X is MEM which is a struct member reference
4883 and the member can safely be loaded and stored with a single
4884 instruction because it is padded. */
4886 mem_is_padded_component_ref (rtx x
)
4888 tree t
= MEM_EXPR (x
);
4890 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4892 t
= TREE_OPERAND (t
, 1);
4893 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4894 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4896 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4897 r
= DECL_FIELD_CONTEXT (t
);
4898 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4900 /* Make sure they are the same mode */
4901 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4903 /* If there are no following fields then the field alignment assures
4904 the structure is padded to the alignment which means this field is
4906 if (TREE_CHAIN (t
) == 0)
4908 /* If the following field is also aligned then this field will be
4911 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4916 /* Parse the -mfixed-range= option string. */
4918 fix_range (const char *const_str
)
4921 char *str
, *dash
, *comma
;
4923 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4924 REG2 are either register names or register numbers. The effect
4925 of this option is to mark the registers in the range from REG1 to
4926 REG2 as ``fixed'' so they won't be used by the compiler. */
4928 i
= strlen (const_str
);
4929 str
= (char *) alloca (i
+ 1);
4930 memcpy (str
, const_str
, i
+ 1);
4934 dash
= strchr (str
, '-');
4937 warning (0, "value of -mfixed-range must have form REG1-REG2");
4941 comma
= strchr (dash
+ 1, ',');
4945 first
= decode_reg_name (str
);
4948 warning (0, "unknown register name: %s", str
);
4952 last
= decode_reg_name (dash
+ 1);
4955 warning (0, "unknown register name: %s", dash
+ 1);
4963 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4967 for (i
= first
; i
<= last
; ++i
)
4968 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4978 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4979 can be generated using the fsmbi instruction. */
4981 fsmbi_const_p (rtx x
)
4985 /* We can always choose TImode for CONST_INT because the high bits
4986 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4987 enum immediate_class c
= classify_immediate (x
, TImode
);
4988 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4993 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4994 can be generated using the cbd, chd, cwd or cdd instruction. */
4996 cpat_const_p (rtx x
, machine_mode mode
)
5000 enum immediate_class c
= classify_immediate (x
, mode
);
5001 return c
== IC_CPAT
;
5007 gen_cpat_const (rtx
* ops
)
5009 unsigned char dst
[16];
5010 int i
, offset
, shift
, isize
;
5011 if (GET_CODE (ops
[3]) != CONST_INT
5012 || GET_CODE (ops
[2]) != CONST_INT
5013 || (GET_CODE (ops
[1]) != CONST_INT
5014 && GET_CODE (ops
[1]) != REG
))
5016 if (GET_CODE (ops
[1]) == REG
5017 && (!REG_POINTER (ops
[1])
5018 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
5021 for (i
= 0; i
< 16; i
++)
5023 isize
= INTVAL (ops
[3]);
5026 else if (isize
== 2)
5030 offset
= (INTVAL (ops
[2]) +
5031 (GET_CODE (ops
[1]) ==
5032 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
5033 for (i
= 0; i
< isize
; i
++)
5034 dst
[offset
+ i
] = i
+ shift
;
5035 return array_to_constant (TImode
, dst
);
5038 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5039 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5040 than 16 bytes, the value is repeated across the rest of the array. */
5042 constant_to_array (machine_mode mode
, rtx x
, unsigned char arr
[16])
5047 memset (arr
, 0, 16);
5048 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5049 if (GET_CODE (x
) == CONST_INT
5050 || (GET_CODE (x
) == CONST_DOUBLE
5051 && (mode
== SFmode
|| mode
== DFmode
)))
5053 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5055 if (GET_CODE (x
) == CONST_DOUBLE
)
5056 val
= const_double_to_hwint (x
);
5059 first
= GET_MODE_SIZE (mode
) - 1;
5060 for (i
= first
; i
>= 0; i
--)
5062 arr
[i
] = val
& 0xff;
5065 /* Splat the constant across the whole array. */
5066 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5069 j
= (j
== first
) ? 0 : j
+ 1;
5072 else if (GET_CODE (x
) == CONST_DOUBLE
)
5074 val
= CONST_DOUBLE_LOW (x
);
5075 for (i
= 15; i
>= 8; i
--)
5077 arr
[i
] = val
& 0xff;
5080 val
= CONST_DOUBLE_HIGH (x
);
5081 for (i
= 7; i
>= 0; i
--)
5083 arr
[i
] = val
& 0xff;
5087 else if (GET_CODE (x
) == CONST_VECTOR
)
5091 mode
= GET_MODE_INNER (mode
);
5092 units
= CONST_VECTOR_NUNITS (x
);
5093 for (i
= 0; i
< units
; i
++)
5095 elt
= CONST_VECTOR_ELT (x
, i
);
5096 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5098 if (GET_CODE (elt
) == CONST_DOUBLE
)
5099 val
= const_double_to_hwint (elt
);
5102 first
= GET_MODE_SIZE (mode
) - 1;
5103 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5105 for (j
= first
; j
>= 0; j
--)
5107 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5117 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5118 smaller than 16 bytes, use the bytes that would represent that value
5119 in a register, e.g., for QImode return the value of arr[3]. */
5121 array_to_constant (machine_mode mode
, const unsigned char arr
[16])
5123 machine_mode inner_mode
;
5125 int units
, size
, i
, j
, k
;
5128 if (GET_MODE_CLASS (mode
) == MODE_INT
5129 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5131 j
= GET_MODE_SIZE (mode
);
5132 i
= j
< 4 ? 4 - j
: 0;
5133 for (val
= 0; i
< j
; i
++)
5134 val
= (val
<< 8) | arr
[i
];
5135 val
= trunc_int_for_mode (val
, mode
);
5136 return GEN_INT (val
);
5142 for (i
= high
= 0; i
< 8; i
++)
5143 high
= (high
<< 8) | arr
[i
];
5144 for (i
= 8, val
= 0; i
< 16; i
++)
5145 val
= (val
<< 8) | arr
[i
];
5146 return immed_double_const (val
, high
, TImode
);
5150 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5151 val
= trunc_int_for_mode (val
, SImode
);
5152 return hwint_to_const_double (SFmode
, val
);
5156 for (i
= 0, val
= 0; i
< 8; i
++)
5157 val
= (val
<< 8) | arr
[i
];
5158 return hwint_to_const_double (DFmode
, val
);
5161 if (!VECTOR_MODE_P (mode
))
5164 units
= GET_MODE_NUNITS (mode
);
5165 size
= GET_MODE_UNIT_SIZE (mode
);
5166 inner_mode
= GET_MODE_INNER (mode
);
5167 v
= rtvec_alloc (units
);
5169 for (k
= i
= 0; i
< units
; ++i
)
5172 for (j
= 0; j
< size
; j
++, k
++)
5173 val
= (val
<< 8) | arr
[k
];
5175 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5176 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5178 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5183 return gen_rtx_CONST_VECTOR (mode
, v
);
5187 reloc_diagnostic (rtx x
)
5190 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5193 if (GET_CODE (x
) == SYMBOL_REF
)
5194 decl
= SYMBOL_REF_DECL (x
);
5195 else if (GET_CODE (x
) == CONST
5196 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5197 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5199 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5200 if (decl
&& !DECL_P (decl
))
5203 /* The decl could be a string constant. */
5204 if (decl
&& DECL_P (decl
))
5207 /* We use last_assemble_variable_decl to get line information. It's
5208 not always going to be right and might not even be close, but will
5209 be right for the more common cases. */
5210 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5211 loc
= DECL_SOURCE_LOCATION (decl
);
5213 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5215 if (TARGET_WARN_RELOC
)
5217 "creating run-time relocation for %qD", decl
);
5220 "creating run-time relocation for %qD", decl
);
5224 if (TARGET_WARN_RELOC
)
5225 warning_at (input_location
, 0, "creating run-time relocation");
5227 error_at (input_location
, "creating run-time relocation");
5231 /* Hook into assemble_integer so we can generate an error for run-time
5232 relocations. The SPU ABI disallows them. */
5234 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5236 /* By default run-time relocations aren't supported, but we allow them
5237 in case users support it in their own run-time loader. And we provide
5238 a warning for those users that don't. */
5239 if ((GET_CODE (x
) == SYMBOL_REF
)
5240 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5241 reloc_diagnostic (x
);
5243 return default_assemble_integer (x
, size
, aligned_p
);
5247 spu_asm_globalize_label (FILE * file
, const char *name
)
5249 fputs ("\t.global\t", file
);
5250 assemble_name (file
, name
);
5255 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
,
5256 int opno ATTRIBUTE_UNUSED
, int *total
,
5257 bool speed ATTRIBUTE_UNUSED
)
5259 machine_mode mode
= GET_MODE (x
);
5260 int cost
= COSTS_N_INSNS (2);
5262 /* Folding to a CONST_VECTOR will use extra space but there might
5263 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5264 only if it allows us to fold away multiple insns. Changing the cost
5265 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5266 because this cost will only be compared against a single insn.
5267 if (code == CONST_VECTOR)
5268 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5271 /* Use defaults for float operations. Not accurate but good enough. */
5274 *total
= COSTS_N_INSNS (13);
5279 *total
= COSTS_N_INSNS (6);
5285 if (satisfies_constraint_K (x
))
5287 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5288 *total
= COSTS_N_INSNS (1);
5290 *total
= COSTS_N_INSNS (3);
5294 *total
= COSTS_N_INSNS (3);
5299 *total
= COSTS_N_INSNS (0);
5303 *total
= COSTS_N_INSNS (5);
5307 case FLOAT_TRUNCATE
:
5309 case UNSIGNED_FLOAT
:
5312 *total
= COSTS_N_INSNS (7);
5318 *total
= COSTS_N_INSNS (9);
5325 GET_CODE (XEXP (x
, 0)) ==
5326 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5327 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5329 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5331 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5332 cost
= COSTS_N_INSNS (14);
5333 if ((val
& 0xffff) == 0)
5334 cost
= COSTS_N_INSNS (9);
5335 else if (val
> 0 && val
< 0x10000)
5336 cost
= COSTS_N_INSNS (11);
5345 *total
= COSTS_N_INSNS (20);
5352 *total
= COSTS_N_INSNS (4);
5355 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5356 *total
= COSTS_N_INSNS (0);
5358 *total
= COSTS_N_INSNS (4);
5361 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5362 if (GET_MODE_CLASS (mode
) == MODE_INT
5363 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5364 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5365 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5371 spu_unwind_word_mode (void)
5376 /* Decide whether we can make a sibling call to a function. DECL is the
5377 declaration of the function being targeted by the call and EXP is the
5378 CALL_EXPR representing the call. */
5380 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5382 return decl
&& !TARGET_LARGE_MEM
;
5385 /* We need to correctly update the back chain pointer and the Available
5386 Stack Size (which is in the second slot of the sp register.) */
5388 spu_allocate_stack (rtx op0
, rtx op1
)
5391 rtx chain
= gen_reg_rtx (V4SImode
);
5392 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5393 rtx sp
= gen_reg_rtx (V4SImode
);
5394 rtx splatted
= gen_reg_rtx (V4SImode
);
5395 rtx pat
= gen_reg_rtx (TImode
);
5397 /* copy the back chain so we can save it back again. */
5398 emit_move_insn (chain
, stack_bot
);
5400 op1
= force_reg (SImode
, op1
);
5402 v
= 0x1020300010203ll
;
5403 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5404 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5406 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5407 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5409 if (flag_stack_check
)
5411 rtx avail
= gen_reg_rtx(SImode
);
5412 rtx result
= gen_reg_rtx(SImode
);
5413 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5414 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5415 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5418 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5420 emit_move_insn (stack_bot
, chain
);
5422 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5426 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5428 static unsigned char arr
[16] =
5429 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5430 rtx temp
= gen_reg_rtx (SImode
);
5431 rtx temp2
= gen_reg_rtx (SImode
);
5432 rtx temp3
= gen_reg_rtx (V4SImode
);
5433 rtx temp4
= gen_reg_rtx (V4SImode
);
5434 rtx pat
= gen_reg_rtx (TImode
);
5435 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5437 /* Restore the backchain from the first word, sp from the second. */
5438 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5439 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5441 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5443 /* Compute Available Stack Size for sp */
5444 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5445 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5447 /* Compute Available Stack Size for back chain */
5448 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5449 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5450 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5452 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5453 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5457 spu_init_libfuncs (void)
5459 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5460 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5461 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5462 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5463 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5464 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5465 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5466 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5467 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5468 set_optab_libfunc (clrsb_optab
, DImode
, "__clrsbdi2");
5469 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5470 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5472 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5473 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5475 set_optab_libfunc (addv_optab
, SImode
, "__addvsi3");
5476 set_optab_libfunc (subv_optab
, SImode
, "__subvsi3");
5477 set_optab_libfunc (smulv_optab
, SImode
, "__mulvsi3");
5478 set_optab_libfunc (sdivv_optab
, SImode
, "__divvsi3");
5479 set_optab_libfunc (negv_optab
, SImode
, "__negvsi2");
5480 set_optab_libfunc (absv_optab
, SImode
, "__absvsi2");
5481 set_optab_libfunc (addv_optab
, DImode
, "__addvdi3");
5482 set_optab_libfunc (subv_optab
, DImode
, "__subvdi3");
5483 set_optab_libfunc (smulv_optab
, DImode
, "__mulvdi3");
5484 set_optab_libfunc (sdivv_optab
, DImode
, "__divvdi3");
5485 set_optab_libfunc (negv_optab
, DImode
, "__negvdi2");
5486 set_optab_libfunc (absv_optab
, DImode
, "__absvdi2");
5488 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5489 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5490 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5491 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5492 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5493 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5496 /* Make a subreg, stripping any existing subreg. We could possibly just
5497 call simplify_subreg, but in this case we know what we want. */
5499 spu_gen_subreg (machine_mode mode
, rtx x
)
5501 if (GET_CODE (x
) == SUBREG
)
5503 if (GET_MODE (x
) == mode
)
5505 return gen_rtx_SUBREG (mode
, x
, 0);
5509 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5511 return (TYPE_MODE (type
) == BLKmode
5513 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5514 || int_size_in_bytes (type
) >
5515 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5518 /* Create the built-in types and functions */
5520 enum spu_function_code
5522 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5523 #include "spu-builtins.def"
5528 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5530 struct spu_builtin_description spu_builtins
[] = {
5531 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5532 {fcode, icode, name, type, params},
5533 #include "spu-builtins.def"
5537 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5539 /* Returns the spu builtin decl for CODE. */
5542 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5544 if (code
>= NUM_SPU_BUILTINS
)
5545 return error_mark_node
;
5547 return spu_builtin_decls
[code
];
5552 spu_init_builtins (void)
5554 struct spu_builtin_description
*d
;
5557 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5558 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5559 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5560 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5561 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5562 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5564 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5565 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5566 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5567 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5569 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5571 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5572 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5573 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5574 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5575 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5576 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5577 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5578 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5579 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5580 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5581 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5582 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5584 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5585 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5586 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5587 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5588 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5589 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5590 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5591 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5593 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5594 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5596 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5598 spu_builtin_types
[SPU_BTI_PTR
] =
5599 build_pointer_type (build_qualified_type
5601 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5603 /* For each builtin we build a new prototype. The tree code will make
5604 sure nodes are shared. */
5605 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5608 char name
[64]; /* build_function will make a copy. */
5614 /* Find last parm. */
5615 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5620 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5622 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5624 sprintf (name
, "__builtin_%s", d
->name
);
5625 spu_builtin_decls
[i
] =
5626 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5627 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5628 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5630 /* These builtins don't throw. */
5631 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5636 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5638 static unsigned char arr
[16] =
5639 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5641 rtx temp
= gen_reg_rtx (Pmode
);
5642 rtx temp2
= gen_reg_rtx (V4SImode
);
5643 rtx temp3
= gen_reg_rtx (V4SImode
);
5644 rtx pat
= gen_reg_rtx (TImode
);
5645 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5647 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5649 /* Restore the sp. */
5650 emit_move_insn (temp
, op1
);
5651 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5653 /* Compute available stack size for sp. */
5654 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5655 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5657 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5658 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5662 spu_safe_dma (HOST_WIDE_INT channel
)
5664 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5668 spu_builtin_splats (rtx ops
[])
5670 machine_mode mode
= GET_MODE (ops
[0]);
5671 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5673 unsigned char arr
[16];
5674 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5675 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5679 rtx reg
= gen_reg_rtx (TImode
);
5681 if (GET_CODE (ops
[1]) != REG
5682 && GET_CODE (ops
[1]) != SUBREG
)
5683 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5689 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5695 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5700 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5705 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5711 emit_move_insn (reg
, shuf
);
5712 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5717 spu_builtin_extract (rtx ops
[])
5722 mode
= GET_MODE (ops
[1]);
5724 if (GET_CODE (ops
[2]) == CONST_INT
)
5729 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5732 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5735 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5738 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5741 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5744 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5752 from
= spu_gen_subreg (TImode
, ops
[1]);
5753 rot
= gen_reg_rtx (TImode
);
5754 tmp
= gen_reg_rtx (SImode
);
5759 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5762 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5763 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5767 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5771 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5776 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5778 emit_insn (gen_spu_convert (ops
[0], rot
));
5782 spu_builtin_insert (rtx ops
[])
5784 machine_mode mode
= GET_MODE (ops
[0]);
5785 machine_mode imode
= GET_MODE_INNER (mode
);
5786 rtx mask
= gen_reg_rtx (TImode
);
5789 if (GET_CODE (ops
[3]) == CONST_INT
)
5790 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5793 offset
= gen_reg_rtx (SImode
);
5794 emit_insn (gen_mulsi3
5795 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5798 (mask
, stack_pointer_rtx
, offset
,
5799 GEN_INT (GET_MODE_SIZE (imode
))));
5800 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5804 spu_builtin_promote (rtx ops
[])
5806 machine_mode mode
, imode
;
5807 rtx rot
, from
, offset
;
5810 mode
= GET_MODE (ops
[0]);
5811 imode
= GET_MODE_INNER (mode
);
5813 from
= gen_reg_rtx (TImode
);
5814 rot
= spu_gen_subreg (TImode
, ops
[0]);
5816 emit_insn (gen_spu_convert (from
, ops
[1]));
5818 if (GET_CODE (ops
[2]) == CONST_INT
)
5820 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5821 if (GET_MODE_SIZE (imode
) < 4)
5822 pos
+= 4 - GET_MODE_SIZE (imode
);
5823 offset
= GEN_INT (pos
& 15);
5827 offset
= gen_reg_rtx (SImode
);
5831 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5834 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5835 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5839 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5840 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5844 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5850 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5854 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
5856 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5857 rtx shuf
= gen_reg_rtx (V4SImode
);
5858 rtx insn
= gen_reg_rtx (V4SImode
);
5863 fnaddr
= force_reg (SImode
, fnaddr
);
5864 cxt
= force_reg (SImode
, cxt
);
5866 if (TARGET_LARGE_MEM
)
5868 rtx rotl
= gen_reg_rtx (V4SImode
);
5869 rtx mask
= gen_reg_rtx (V4SImode
);
5870 rtx bi
= gen_reg_rtx (SImode
);
5871 static unsigned char const shufa
[16] = {
5872 2, 3, 0, 1, 18, 19, 16, 17,
5873 0, 1, 2, 3, 16, 17, 18, 19
5875 static unsigned char const insna
[16] = {
5877 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5879 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5882 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5883 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5885 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5886 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5887 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5888 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5890 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5891 emit_move_insn (mem
, insn
);
5893 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5894 mem
= adjust_address (m_tramp
, Pmode
, 16);
5895 emit_move_insn (mem
, bi
);
5899 rtx scxt
= gen_reg_rtx (SImode
);
5900 rtx sfnaddr
= gen_reg_rtx (SImode
);
5901 static unsigned char const insna
[16] = {
5902 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5908 shufc
= gen_reg_rtx (TImode
);
5909 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5911 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5912 fits 18 bits and the last 4 are zeros. This will be true if
5913 the stack pointer is initialized to 0x3fff0 at program start,
5914 otherwise the ila instruction will be garbage. */
5916 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5917 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5919 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5920 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5921 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5923 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5924 emit_move_insn (mem
, insn
);
5926 emit_insn (gen_sync ());
5930 spu_warn_func_return (tree decl
)
5932 /* Naked functions are implemented entirely in assembly, including the
5933 return sequence, so suppress warnings about this. */
5934 return !spu_naked_function_p (decl
);
5938 spu_expand_sign_extend (rtx ops
[])
5940 unsigned char arr
[16];
5941 rtx pat
= gen_reg_rtx (TImode
);
5944 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5945 if (GET_MODE (ops
[1]) == QImode
)
5947 sign
= gen_reg_rtx (HImode
);
5948 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5949 for (i
= 0; i
< 16; i
++)
5955 for (i
= 0; i
< 16; i
++)
5957 switch (GET_MODE (ops
[1]))
5960 sign
= gen_reg_rtx (SImode
);
5961 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5963 arr
[last
- 1] = 0x02;
5966 sign
= gen_reg_rtx (SImode
);
5967 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5968 for (i
= 0; i
< 4; i
++)
5969 arr
[last
- i
] = 3 - i
;
5972 sign
= gen_reg_rtx (SImode
);
5973 c
= gen_reg_rtx (SImode
);
5974 emit_insn (gen_spu_convert (c
, ops
[1]));
5975 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5976 for (i
= 0; i
< 8; i
++)
5977 arr
[last
- i
] = 7 - i
;
5983 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5984 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5987 /* expand vector initialization. If there are any constant parts,
5988 load constant parts first. Then load any non-constant parts. */
5990 spu_expand_vector_init (rtx target
, rtx vals
)
5992 machine_mode mode
= GET_MODE (target
);
5993 int n_elts
= GET_MODE_NUNITS (mode
);
5995 bool all_same
= true;
5996 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5999 first
= XVECEXP (vals
, 0, 0);
6000 for (i
= 0; i
< n_elts
; ++i
)
6002 x
= XVECEXP (vals
, 0, i
);
6003 if (!(CONST_INT_P (x
)
6004 || GET_CODE (x
) == CONST_DOUBLE
6005 || GET_CODE (x
) == CONST_FIXED
))
6009 if (first_constant
== NULL_RTX
)
6012 if (i
> 0 && !rtx_equal_p (x
, first
))
6016 /* if all elements are the same, use splats to repeat elements */
6019 if (!CONSTANT_P (first
)
6020 && !register_operand (first
, GET_MODE (x
)))
6021 first
= force_reg (GET_MODE (first
), first
);
6022 emit_insn (gen_spu_splats (target
, first
));
6026 /* load constant parts */
6027 if (n_var
!= n_elts
)
6031 emit_move_insn (target
,
6032 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6036 rtx constant_parts_rtx
= copy_rtx (vals
);
6038 gcc_assert (first_constant
!= NULL_RTX
);
6039 /* fill empty slots with the first constant, this increases
6040 our chance of using splats in the recursive call below. */
6041 for (i
= 0; i
< n_elts
; ++i
)
6043 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6044 if (!(CONST_INT_P (x
)
6045 || GET_CODE (x
) == CONST_DOUBLE
6046 || GET_CODE (x
) == CONST_FIXED
))
6047 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6050 spu_expand_vector_init (target
, constant_parts_rtx
);
6054 /* load variable parts */
6057 rtx insert_operands
[4];
6059 insert_operands
[0] = target
;
6060 insert_operands
[2] = target
;
6061 for (i
= 0; i
< n_elts
; ++i
)
6063 x
= XVECEXP (vals
, 0, i
);
6064 if (!(CONST_INT_P (x
)
6065 || GET_CODE (x
) == CONST_DOUBLE
6066 || GET_CODE (x
) == CONST_FIXED
))
6068 if (!register_operand (x
, GET_MODE (x
)))
6069 x
= force_reg (GET_MODE (x
), x
);
6070 insert_operands
[1] = x
;
6071 insert_operands
[3] = GEN_INT (i
);
6072 spu_builtin_insert (insert_operands
);
6078 /* Return insn index for the vector compare instruction for given CODE,
6079 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6082 get_vec_cmp_insn (enum rtx_code code
,
6083 machine_mode dest_mode
,
6084 machine_mode op_mode
)
6090 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6091 return CODE_FOR_ceq_v16qi
;
6092 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6093 return CODE_FOR_ceq_v8hi
;
6094 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6095 return CODE_FOR_ceq_v4si
;
6096 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6097 return CODE_FOR_ceq_v4sf
;
6098 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6099 return CODE_FOR_ceq_v2df
;
6102 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6103 return CODE_FOR_cgt_v16qi
;
6104 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6105 return CODE_FOR_cgt_v8hi
;
6106 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6107 return CODE_FOR_cgt_v4si
;
6108 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6109 return CODE_FOR_cgt_v4sf
;
6110 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6111 return CODE_FOR_cgt_v2df
;
6114 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6115 return CODE_FOR_clgt_v16qi
;
6116 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6117 return CODE_FOR_clgt_v8hi
;
6118 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6119 return CODE_FOR_clgt_v4si
;
6127 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6128 DMODE is expected destination mode. This is a recursive function. */
6131 spu_emit_vector_compare (enum rtx_code rcode
,
6137 machine_mode dest_mode
;
6138 machine_mode op_mode
= GET_MODE (op1
);
6140 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6142 /* Floating point vector compare instructions uses destination V4SImode.
6143 Double floating point vector compare instructions uses destination V2DImode.
6144 Move destination to appropriate mode later. */
6145 if (dmode
== V4SFmode
)
6146 dest_mode
= V4SImode
;
6147 else if (dmode
== V2DFmode
)
6148 dest_mode
= V2DImode
;
6152 mask
= gen_reg_rtx (dest_mode
);
6153 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6155 if (vec_cmp_insn
== -1)
6157 bool swap_operands
= false;
6158 bool try_again
= false;
6163 swap_operands
= true;
6168 swap_operands
= true;
6178 /* Treat A != B as ~(A==B). */
6180 enum rtx_code rev_code
;
6181 enum insn_code nor_code
;
6184 rev_code
= reverse_condition_maybe_unordered (rcode
);
6185 rev_mask
= spu_emit_vector_compare (rev_code
, op0
, op1
, dest_mode
);
6187 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6188 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6189 emit_insn (GEN_FCN (nor_code
) (mask
, rev_mask
));
6190 if (dmode
!= dest_mode
)
6192 rtx temp
= gen_reg_rtx (dest_mode
);
6193 convert_move (temp
, mask
, 0);
6203 /* Try GT/GTU/LT/LTU OR EQ */
6206 enum insn_code ior_code
;
6207 enum rtx_code new_code
;
6211 case GE
: new_code
= GT
; break;
6212 case GEU
: new_code
= GTU
; break;
6213 case LE
: new_code
= LT
; break;
6214 case LEU
: new_code
= LTU
; break;
6219 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6220 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6222 ior_code
= optab_handler (ior_optab
, dest_mode
);
6223 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6224 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6225 if (dmode
!= dest_mode
)
6227 rtx temp
= gen_reg_rtx (dest_mode
);
6228 convert_move (temp
, mask
, 0);
6238 enum insn_code ior_code
;
6240 lt_rtx
= spu_emit_vector_compare (LT
, op0
, op1
, dest_mode
);
6241 gt_rtx
= spu_emit_vector_compare (GT
, op0
, op1
, dest_mode
);
6243 ior_code
= optab_handler (ior_optab
, dest_mode
);
6244 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6245 emit_insn (GEN_FCN (ior_code
) (mask
, lt_rtx
, gt_rtx
));
6246 if (dmode
!= dest_mode
)
6248 rtx temp
= gen_reg_rtx (dest_mode
);
6249 convert_move (temp
, mask
, 0);
6256 /* Implement as (A==A) & (B==B) */
6259 enum insn_code and_code
;
6261 a_rtx
= spu_emit_vector_compare (EQ
, op0
, op0
, dest_mode
);
6262 b_rtx
= spu_emit_vector_compare (EQ
, op1
, op1
, dest_mode
);
6264 and_code
= optab_handler (and_optab
, dest_mode
);
6265 gcc_assert (and_code
!= CODE_FOR_nothing
);
6266 emit_insn (GEN_FCN (and_code
) (mask
, a_rtx
, b_rtx
));
6267 if (dmode
!= dest_mode
)
6269 rtx temp
= gen_reg_rtx (dest_mode
);
6270 convert_move (temp
, mask
, 0);
6280 /* You only get two chances. */
6282 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6284 gcc_assert (vec_cmp_insn
!= -1);
6295 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6296 if (dmode
!= dest_mode
)
6298 rtx temp
= gen_reg_rtx (dest_mode
);
6299 convert_move (temp
, mask
, 0);
6306 /* Emit vector conditional expression.
6307 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6308 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6311 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6312 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6314 machine_mode dest_mode
= GET_MODE (dest
);
6315 enum rtx_code rcode
= GET_CODE (cond
);
6318 /* Get the vector mask for the given relational operations. */
6319 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6321 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6327 spu_force_reg (machine_mode mode
, rtx op
)
6330 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6332 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6333 || GET_MODE (op
) == BLKmode
)
6334 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6338 r
= force_reg (GET_MODE (op
), op
);
6339 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6341 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6346 x
= gen_reg_rtx (mode
);
6347 emit_insn (gen_spu_convert (x
, r
));
6352 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6354 HOST_WIDE_INT v
= 0;
6356 /* Check the range of immediate operands. */
6357 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6359 int range
= p
- SPU_BTI_7
;
6361 if (!CONSTANT_P (op
))
6362 error ("%s expects an integer literal in the range [%d, %d]",
6364 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6366 if (GET_CODE (op
) == CONST
6367 && (GET_CODE (XEXP (op
, 0)) == PLUS
6368 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6370 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6371 op
= XEXP (XEXP (op
, 0), 0);
6373 else if (GET_CODE (op
) == CONST_INT
)
6375 else if (GET_CODE (op
) == CONST_VECTOR
6376 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6377 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6379 /* The default for v is 0 which is valid in every range. */
6380 if (v
< spu_builtin_range
[range
].low
6381 || v
> spu_builtin_range
[range
].high
)
6382 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6384 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6393 /* This is only used in lqa, and stqa. Even though the insns
6394 encode 16 bits of the address (all but the 2 least
6395 significant), only 14 bits are used because it is masked to
6396 be 16 byte aligned. */
6400 /* This is used for lqr and stqr. */
6407 if (GET_CODE (op
) == LABEL_REF
6408 || (GET_CODE (op
) == SYMBOL_REF
6409 && SYMBOL_REF_FUNCTION_P (op
))
6410 || (v
& ((1 << lsbits
) - 1)) != 0)
6411 warning (0, "%d least significant bits of %s are ignored", lsbits
,
6418 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6419 rtx target
, rtx ops
[])
6421 enum insn_code icode
= (enum insn_code
) d
->icode
;
6424 /* Expand the arguments into rtl. */
6426 if (d
->parm
[0] != SPU_BTI_VOID
)
6429 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6431 tree arg
= CALL_EXPR_ARG (exp
, a
);
6434 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6437 gcc_assert (i
== insn_data
[icode
].n_generator_args
);
6442 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6443 tree exp
, rtx target
)
6447 enum insn_code icode
= (enum insn_code
) d
->icode
;
6448 machine_mode mode
, tmode
;
6453 /* Set up ops[] with values from arglist. */
6454 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6456 /* Handle the target operand which must be operand 0. */
6458 if (d
->parm
[0] != SPU_BTI_VOID
)
6461 /* We prefer the mode specified for the match_operand otherwise
6462 use the mode from the builtin function prototype. */
6463 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6464 if (tmode
== VOIDmode
)
6465 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6467 /* Try to use target because not using it can lead to extra copies
6468 and when we are using all of the registers extra copies leads
6470 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6473 target
= ops
[0] = gen_reg_rtx (tmode
);
6475 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6481 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6483 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6488 arg
= CALL_EXPR_ARG (exp
, 0);
6489 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6490 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6491 addr
= memory_address (mode
, op
);
6494 op
= gen_reg_rtx (GET_MODE (addr
));
6495 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6496 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6497 op
= gen_rtx_MEM (mode
, op
);
6499 pat
= GEN_FCN (icode
) (target
, op
);
6506 /* Ignore align_hint, but still expand it's args in case they have
6508 if (icode
== CODE_FOR_spu_align_hint
)
6511 /* Handle the rest of the operands. */
6512 for (p
= 1; i
< n_operands
; i
++, p
++)
6514 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6515 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6517 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6519 /* mode can be VOIDmode here for labels */
6521 /* For specific intrinsics with an immediate operand, e.g.,
6522 si_ai(), we sometimes need to convert the scalar argument to a
6523 vector argument by splatting the scalar. */
6524 if (VECTOR_MODE_P (mode
)
6525 && (GET_CODE (ops
[i
]) == CONST_INT
6526 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6527 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6529 if (GET_CODE (ops
[i
]) == CONST_INT
)
6530 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6533 rtx reg
= gen_reg_rtx (mode
);
6534 machine_mode imode
= GET_MODE_INNER (mode
);
6535 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6536 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6537 if (imode
!= GET_MODE (ops
[i
]))
6538 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6539 TYPE_UNSIGNED (spu_builtin_types
6541 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6546 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6548 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6549 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6555 pat
= GEN_FCN (icode
) (0);
6558 pat
= GEN_FCN (icode
) (ops
[0]);
6561 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6564 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6567 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6570 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6573 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6582 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6583 emit_call_insn (pat
);
6584 else if (d
->type
== B_JUMP
)
6586 emit_jump_insn (pat
);
6592 return_type
= spu_builtin_types
[d
->parm
[0]];
6593 if (d
->parm
[0] != SPU_BTI_VOID
6594 && GET_MODE (target
) != TYPE_MODE (return_type
))
6596 /* target is the return value. It should always be the mode of
6597 the builtin function prototype. */
6598 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6605 spu_expand_builtin (tree exp
,
6607 rtx subtarget ATTRIBUTE_UNUSED
,
6608 machine_mode mode ATTRIBUTE_UNUSED
,
6609 int ignore ATTRIBUTE_UNUSED
)
6611 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6612 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6613 struct spu_builtin_description
*d
;
6615 if (fcode
< NUM_SPU_BUILTINS
)
6617 d
= &spu_builtins
[fcode
];
6619 return spu_expand_builtin_1 (d
, exp
, target
);
6624 /* Implement targetm.vectorize.builtin_mask_for_load. */
6626 spu_builtin_mask_for_load (void)
6628 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6631 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6633 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6635 int misalign ATTRIBUTE_UNUSED
)
6639 switch (type_of_cost
)
6647 case cond_branch_not_taken
:
6649 case vec_promote_demote
:
6656 /* Load + rotate. */
6659 case unaligned_load
:
6662 case cond_branch_taken
:
6666 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6667 return elements
/ 2 + 1;
6674 /* Implement targetm.vectorize.init_cost. */
6677 spu_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
6679 unsigned *cost
= XNEWVEC (unsigned, 3);
6680 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
6684 /* Implement targetm.vectorize.add_stmt_cost. */
6687 spu_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6688 struct _stmt_vec_info
*stmt_info
, int misalign
,
6689 enum vect_cost_model_location where
)
6691 unsigned *cost
= (unsigned *) data
;
6692 unsigned retval
= 0;
6694 if (flag_vect_cost_model
)
6696 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6697 int stmt_cost
= spu_builtin_vectorization_cost (kind
, vectype
, misalign
);
6699 /* Statements in an inner loop relative to the loop being
6700 vectorized are weighted more heavily. The value here is
6701 arbitrary and could potentially be improved with analysis. */
6702 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6703 count
*= 50; /* FIXME. */
6705 retval
= (unsigned) (count
* stmt_cost
);
6706 cost
[where
] += retval
;
6712 /* Implement targetm.vectorize.finish_cost. */
6715 spu_finish_cost (void *data
, unsigned *prologue_cost
,
6716 unsigned *body_cost
, unsigned *epilogue_cost
)
6718 unsigned *cost
= (unsigned *) data
;
6719 *prologue_cost
= cost
[vect_prologue
];
6720 *body_cost
= cost
[vect_body
];
6721 *epilogue_cost
= cost
[vect_epilogue
];
6724 /* Implement targetm.vectorize.destroy_cost_data. */
6727 spu_destroy_cost_data (void *data
)
6732 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6733 after applying N number of iterations. This routine does not determine
6734 how may iterations are required to reach desired alignment. */
6737 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6742 /* All other types are naturally aligned. */
6746 /* Return the appropriate mode for a named address pointer. */
6748 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6752 case ADDR_SPACE_GENERIC
:
6761 /* Return the appropriate mode for a named address address. */
6763 spu_addr_space_address_mode (addr_space_t addrspace
)
6767 case ADDR_SPACE_GENERIC
:
6776 /* Determine if one named address space is a subset of another. */
6779 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6781 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6782 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6784 if (subset
== superset
)
6787 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6788 being subsets but instead as disjoint address spaces. */
6789 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6793 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6796 /* Convert from one address space to another. */
6798 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6800 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6801 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6803 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6804 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6806 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6810 ls
= gen_const_mem (DImode
,
6811 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6812 set_mem_align (ls
, 128);
6814 result
= gen_reg_rtx (Pmode
);
6815 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6816 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6817 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6818 ls
, const0_rtx
, Pmode
, 1);
6820 emit_insn (gen_subsi3 (result
, op
, ls
));
6825 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6829 ls
= gen_const_mem (DImode
,
6830 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6831 set_mem_align (ls
, 128);
6833 result
= gen_reg_rtx (EAmode
);
6834 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6835 op
= force_reg (Pmode
, op
);
6836 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6837 ls
, const0_rtx
, EAmode
, 1);
6838 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6840 if (EAmode
== SImode
)
6841 emit_insn (gen_addsi3 (result
, op
, ls
));
6843 emit_insn (gen_adddi3 (result
, op
, ls
));
6853 /* Count the total number of instructions in each pipe and return the
6854 maximum, which is used as the Minimum Iteration Interval (MII)
6855 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6856 -2 are instructions that can go in pipe0 or pipe1. */
6858 spu_sms_res_mii (struct ddg
*g
)
6861 unsigned t
[4] = {0, 0, 0, 0};
6863 for (i
= 0; i
< g
->num_nodes
; i
++)
6865 rtx_insn
*insn
= g
->nodes
[i
].insn
;
6866 int p
= get_pipe (insn
) + 2;
6868 gcc_assert (p
>= 0);
6872 if (dump_file
&& INSN_P (insn
))
6873 fprintf (dump_file
, "i%d %s %d %d\n",
6875 insn_data
[INSN_CODE(insn
)].name
,
6879 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6881 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6886 spu_init_expanders (void)
6891 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6892 frame_pointer_needed is true. We don't know that until we're
6893 expanding the prologue. */
6894 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6896 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6897 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6898 to be treated as aligned, so generate them here. */
6899 r0
= gen_reg_rtx (SImode
);
6900 r1
= gen_reg_rtx (SImode
);
6901 mark_reg_pointer (r0
, 128);
6902 mark_reg_pointer (r1
, 128);
6903 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6904 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6909 spu_libgcc_cmp_return_mode (void)
6912 /* For SPU word mode is TI mode so it is better to use SImode
6913 for compare returns. */
6918 spu_libgcc_shift_count_mode (void)
6920 /* For SPU word mode is TI mode so it is better to use SImode
6921 for shift counts. */
6925 /* Implement targetm.section_type_flags. */
6927 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6929 /* .toe needs to have type @nobits. */
6930 if (strcmp (name
, ".toe") == 0)
6932 /* Don't load _ea into the current address space. */
6933 if (strcmp (name
, "._ea") == 0)
6934 return SECTION_WRITE
| SECTION_DEBUG
;
6935 return default_section_type_flags (decl
, name
, reloc
);
6938 /* Implement targetm.select_section. */
6940 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
6942 /* Variables and constants defined in the __ea address space
6943 go into a special section named "._ea". */
6944 if (TREE_TYPE (decl
) != error_mark_node
6945 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
6947 /* We might get called with string constants, but get_named_section
6948 doesn't like them as they are not DECLs. Also, we need to set
6949 flags in that case. */
6951 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
6953 return get_named_section (decl
, "._ea", reloc
);
6956 return default_elf_select_section (decl
, reloc
, align
);
6959 /* Implement targetm.unique_section. */
6961 spu_unique_section (tree decl
, int reloc
)
6963 /* We don't support unique section names in the __ea address
6965 if (TREE_TYPE (decl
) != error_mark_node
6966 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
6969 default_unique_section (decl
, reloc
);
6972 /* Generate a constant or register which contains 2^SCALE. We assume
6973 the result is valid for MODE. Currently, MODE must be V4SFmode and
6974 SCALE must be SImode. */
6976 spu_gen_exp2 (machine_mode mode
, rtx scale
)
6978 gcc_assert (mode
== V4SFmode
);
6979 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6980 if (GET_CODE (scale
) != CONST_INT
)
6982 /* unsigned int exp = (127 + scale) << 23;
6983 __vector float m = (__vector float) spu_splats (exp); */
6984 rtx reg
= force_reg (SImode
, scale
);
6985 rtx exp
= gen_reg_rtx (SImode
);
6986 rtx mul
= gen_reg_rtx (mode
);
6987 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6988 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6989 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6994 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6995 unsigned char arr
[16];
6996 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6997 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6998 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6999 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
7000 return array_to_constant (mode
, arr
);
7004 /* After reload, just change the convert into a move instruction
7005 or a dead instruction. */
7007 spu_split_convert (rtx ops
[])
7009 if (REGNO (ops
[0]) == REGNO (ops
[1]))
7010 emit_note (NOTE_INSN_DELETED
);
7013 /* Use TImode always as this might help hard reg copyprop. */
7014 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
7015 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
7016 emit_insn (gen_move_insn (op0
, op1
));
7021 spu_function_profiler (FILE * file
, int labelno ATTRIBUTE_UNUSED
)
7023 fprintf (file
, "# profile\n");
7024 fprintf (file
, "brsl $75, _mcount\n");
7027 /* Implement targetm.ref_may_alias_errno. */
7029 spu_ref_may_alias_errno (ao_ref
*ref
)
7031 tree base
= ao_ref_base (ref
);
7033 /* With SPU newlib, errno is defined as something like
7035 The default implementation of this target macro does not
7036 recognize such expressions, so special-code for it here. */
7038 if (TREE_CODE (base
) == VAR_DECL
7039 && !TREE_STATIC (base
)
7040 && DECL_EXTERNAL (base
)
7041 && TREE_CODE (TREE_TYPE (base
)) == RECORD_TYPE
7042 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base
)),
7043 "_impure_data") == 0
7044 /* _errno is the first member of _impure_data. */
7045 && ref
->offset
== 0)
7048 return default_ref_may_alias_errno (ref
);
7051 /* Output thunk to FILE that implements a C++ virtual function call (with
7052 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7053 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7054 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7055 relative to the resulting this pointer. */
7058 spu_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
7059 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
7064 /* Make sure unwind info is emitted for the thunk if needed. */
7065 final_start_function (emit_barrier (), file
, 1);
7067 /* Operand 0 is the target function. */
7068 op
[0] = XEXP (DECL_RTL (function
), 0);
7070 /* Operand 1 is the 'this' pointer. */
7071 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
7072 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
+ 1);
7074 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
);
7076 /* Operands 2/3 are the low/high halfwords of delta. */
7077 op
[2] = GEN_INT (trunc_int_for_mode (delta
, HImode
));
7078 op
[3] = GEN_INT (trunc_int_for_mode (delta
>> 16, HImode
));
7080 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7081 op
[4] = GEN_INT (trunc_int_for_mode (vcall_offset
, HImode
));
7082 op
[5] = GEN_INT (trunc_int_for_mode (vcall_offset
>> 16, HImode
));
7084 /* Operands 6/7 are temporary registers. */
7085 op
[6] = gen_rtx_REG (Pmode
, 79);
7086 op
[7] = gen_rtx_REG (Pmode
, 78);
7088 /* Add DELTA to this pointer. */
7091 if (delta
>= -0x200 && delta
< 0x200)
7092 output_asm_insn ("ai\t%1,%1,%2", op
);
7093 else if (delta
>= -0x8000 && delta
< 0x8000)
7095 output_asm_insn ("il\t%6,%2", op
);
7096 output_asm_insn ("a\t%1,%1,%6", op
);
7100 output_asm_insn ("ilhu\t%6,%3", op
);
7101 output_asm_insn ("iohl\t%6,%2", op
);
7102 output_asm_insn ("a\t%1,%1,%6", op
);
7106 /* Perform vcall adjustment. */
7109 output_asm_insn ("lqd\t%7,0(%1)", op
);
7110 output_asm_insn ("rotqby\t%7,%7,%1", op
);
7112 if (vcall_offset
>= -0x200 && vcall_offset
< 0x200)
7113 output_asm_insn ("ai\t%7,%7,%4", op
);
7114 else if (vcall_offset
>= -0x8000 && vcall_offset
< 0x8000)
7116 output_asm_insn ("il\t%6,%4", op
);
7117 output_asm_insn ("a\t%7,%7,%6", op
);
7121 output_asm_insn ("ilhu\t%6,%5", op
);
7122 output_asm_insn ("iohl\t%6,%4", op
);
7123 output_asm_insn ("a\t%7,%7,%6", op
);
7126 output_asm_insn ("lqd\t%6,0(%7)", op
);
7127 output_asm_insn ("rotqby\t%6,%6,%7", op
);
7128 output_asm_insn ("a\t%1,%1,%6", op
);
7131 /* Jump to target. */
7132 output_asm_insn ("br\t%0", op
);
7134 final_end_function ();
7137 /* Canonicalize a comparison from one we don't have to one we do have. */
7139 spu_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
7140 bool op0_preserve_value
)
7142 if (!op0_preserve_value
7143 && (*code
== LE
|| *code
== LT
|| *code
== LEU
|| *code
== LTU
))
7148 *code
= (int)swap_condition ((enum rtx_code
)*code
);
7152 /* Table of machine attributes. */
7153 static const struct attribute_spec spu_attribute_table
[] =
7155 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7156 affects_type_identity } */
7157 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
,
7159 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
,
7161 { NULL
, 0, 0, false, false, false, NULL
, false }
7164 /* TARGET overrides. */
7166 #undef TARGET_ADDR_SPACE_POINTER_MODE
7167 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7169 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7170 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7172 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7173 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7174 spu_addr_space_legitimate_address_p
7176 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7177 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7179 #undef TARGET_ADDR_SPACE_SUBSET_P
7180 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7182 #undef TARGET_ADDR_SPACE_CONVERT
7183 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7185 #undef TARGET_INIT_BUILTINS
7186 #define TARGET_INIT_BUILTINS spu_init_builtins
7187 #undef TARGET_BUILTIN_DECL
7188 #define TARGET_BUILTIN_DECL spu_builtin_decl
7190 #undef TARGET_EXPAND_BUILTIN
7191 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7193 #undef TARGET_UNWIND_WORD_MODE
7194 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7196 #undef TARGET_LEGITIMIZE_ADDRESS
7197 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7199 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7200 and .quad for the debugger. When it is known that the assembler is fixed,
7201 these can be removed. */
7202 #undef TARGET_ASM_UNALIGNED_SI_OP
7203 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7205 #undef TARGET_ASM_ALIGNED_DI_OP
7206 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7208 /* The .8byte directive doesn't seem to work well for a 32 bit
7210 #undef TARGET_ASM_UNALIGNED_DI_OP
7211 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7213 #undef TARGET_RTX_COSTS
7214 #define TARGET_RTX_COSTS spu_rtx_costs
7216 #undef TARGET_ADDRESS_COST
7217 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7219 #undef TARGET_SCHED_ISSUE_RATE
7220 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7222 #undef TARGET_SCHED_INIT_GLOBAL
7223 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7225 #undef TARGET_SCHED_INIT
7226 #define TARGET_SCHED_INIT spu_sched_init
7228 #undef TARGET_SCHED_VARIABLE_ISSUE
7229 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7231 #undef TARGET_SCHED_REORDER
7232 #define TARGET_SCHED_REORDER spu_sched_reorder
7234 #undef TARGET_SCHED_REORDER2
7235 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7237 #undef TARGET_SCHED_ADJUST_COST
7238 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7240 #undef TARGET_ATTRIBUTE_TABLE
7241 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7243 #undef TARGET_ASM_INTEGER
7244 #define TARGET_ASM_INTEGER spu_assemble_integer
7246 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7247 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7249 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7250 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7252 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7253 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7255 #undef TARGET_ASM_GLOBALIZE_LABEL
7256 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7258 #undef TARGET_PASS_BY_REFERENCE
7259 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7261 #undef TARGET_FUNCTION_ARG
7262 #define TARGET_FUNCTION_ARG spu_function_arg
7264 #undef TARGET_FUNCTION_ARG_ADVANCE
7265 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7267 #undef TARGET_MUST_PASS_IN_STACK
7268 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7270 #undef TARGET_BUILD_BUILTIN_VA_LIST
7271 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7273 #undef TARGET_EXPAND_BUILTIN_VA_START
7274 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7276 #undef TARGET_SETUP_INCOMING_VARARGS
7277 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7279 #undef TARGET_MACHINE_DEPENDENT_REORG
7280 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7282 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7283 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7285 #undef TARGET_INIT_LIBFUNCS
7286 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7288 #undef TARGET_RETURN_IN_MEMORY
7289 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7291 #undef TARGET_ENCODE_SECTION_INFO
7292 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7294 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7295 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7297 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7298 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7300 #undef TARGET_VECTORIZE_INIT_COST
7301 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7303 #undef TARGET_VECTORIZE_ADD_STMT_COST
7304 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7306 #undef TARGET_VECTORIZE_FINISH_COST
7307 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7309 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7310 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7312 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7313 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7315 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7316 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7318 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7319 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7321 #undef TARGET_SCHED_SMS_RES_MII
7322 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7324 #undef TARGET_SECTION_TYPE_FLAGS
7325 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7327 #undef TARGET_ASM_SELECT_SECTION
7328 #define TARGET_ASM_SELECT_SECTION spu_select_section
7330 #undef TARGET_ASM_UNIQUE_SECTION
7331 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7333 #undef TARGET_LEGITIMATE_ADDRESS_P
7334 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7336 #undef TARGET_LEGITIMATE_CONSTANT_P
7337 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7339 #undef TARGET_TRAMPOLINE_INIT
7340 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7342 #undef TARGET_WARN_FUNC_RETURN
7343 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7345 #undef TARGET_OPTION_OVERRIDE
7346 #define TARGET_OPTION_OVERRIDE spu_option_override
7348 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7349 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7351 #undef TARGET_REF_MAY_ALIAS_ERRNO
7352 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7354 #undef TARGET_ASM_OUTPUT_MI_THUNK
7355 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7356 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7357 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7359 /* Variable tracking should be run after all optimizations which
7360 change order of insns. It also needs a valid CFG. */
7361 #undef TARGET_DELAY_VARTRACK
7362 #define TARGET_DELAY_VARTRACK true
7364 #undef TARGET_CANONICALIZE_COMPARISON
7365 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7367 #undef TARGET_CAN_USE_DOLOOP_P
7368 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7370 struct gcc_target targetm
= TARGET_INITIALIZER
;