(define_expand "mov<mode>"
[(set (match_operand:VALL_F16 0 "nonimmediate_operand")
(match_operand:VALL_F16 1 "general_operand"))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
"
/* Force the operand into a register if it is not an
immediate whose use can be replaced with xzr.
(define_expand "movmisalign<mode>"
[(set (match_operand:VALL_F16 0 "nonimmediate_operand")
(match_operand:VALL_F16 1 "general_operand"))]
- "TARGET_SIMD && !STRICT_ALIGNMENT"
+ "TARGET_FLOAT && !STRICT_ALIGNMENT"
{
/* This pattern is not permitted to fail during expansion: if both arguments
are non-registers (e.g. memory := constant, which can be created by the
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
[(set (match_operand:VDMOV 0 "nonimmediate_operand"
- "=w, m, m, w, ?r, ?w, ?r, w")
+ "=w, m, m, w, ?r, ?w, ?r, w, w")
(match_operand:VDMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn"))]
- "TARGET_SIMD
+ "m, Dz, w, w, w, r, r, Dn, Dz"))]
+ "TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
{
case 0: return "ldr\t%d0, %1";
case 1: return "str\txzr, %0";
case 2: return "str\t%d1, %0";
- case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
- case 4: return "umov\t%0, %1.d[0]";
+ case 3:
+ if (TARGET_SIMD)
+ return "mov\t%0.<Vbtype>, %1.<Vbtype>";
+ return "fmov\t%d0, %d1";
+ case 4:
+ if (TARGET_SIMD)
+ return "umov\t%0, %1.d[0]";
+ return "fmov\t%x0, %d1";
case 5: return "fmov\t%d0, %1";
case 6: return "mov\t%0, %1";
case 7:
return aarch64_output_simd_mov_immediate (operands[1], 64);
+ case 8: return "fmov\t%d0, xzr";
default: gcc_unreachable ();
}
}
[(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
neon_logic<q>, neon_to_gp<q>, f_mcr,\
- mov_reg, neon_move<q>")]
+ mov_reg, neon_move<q>, f_mcr")
+ (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
)
(define_insn "*aarch64_simd_mov<VQMOV:mode>"
[(set (match_operand:VQMOV 0 "nonimmediate_operand"
- "=w, Umn, m, w, ?r, ?w, ?r, w")
+ "=w, Umn, m, w, ?r, ?w, ?r, w, w")
(match_operand:VQMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn"))]
- "TARGET_SIMD
+ "m, Dz, w, w, w, r, r, Dn, Dz"))]
+ "TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
{
return "#";
case 7:
return aarch64_output_simd_mov_immediate (operands[1], 128);
+ case 8:
+ return "fmov\t%d0, xzr";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
neon_logic<q>, multiple, multiple,\
- multiple, neon_move<q>")
- (set_attr "length" "4,4,4,4,8,8,8,4")]
+ multiple, neon_move<q>, fmov")
+ (set_attr "length" "4,4,4,4,8,8,8,4,4")
+ (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
)
;; When storing lane zero we can use the normal STR and its more permissive
(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:DREG2 2 "register_operand" "=w")
(match_operand:DREG2 3 "memory_operand" "m"))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
(match_operand:DREG 1 "register_operand" "w"))
(set (match_operand:DREG2 2 "memory_operand" "=m")
(match_operand:DREG2 3 "register_operand" "w"))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
(match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:VQ2 2 "register_operand" "=w")
(match_operand:VQ2 3 "memory_operand" "m"))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
(match_operand:VQ 1 "register_operand" "w"))
(set (match_operand:VQ2 2 "memory_operand" "=m")
(match_operand:VQ2 3 "register_operand" "w"))]
- "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (<VQ:MODE>mode)))"
+ "TARGET_FLOAT
+ && rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[0], 0),
+ GET_MODE_SIZE (<VQ:MODE>mode)))"
"stp\\t%q1, %q3, %z0"
[(set_attr "type" "neon_stp_q")]
)
(define_split
[(set (match_operand:VQMOV 0 "register_operand" "")
- (match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_SIMD && reload_completed
+ (match_operand:VQMOV 1 "register_operand" ""))]
+ "TARGET_FLOAT
+ && reload_completed
&& GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))"
[(const_int 0)]
(define_split
[(set (match_operand:VQMOV 0 "register_operand" "")
(match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_SIMD && reload_completed
+ "TARGET_FLOAT
+ && reload_completed
&& ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
|| (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
[(const_int 0)]
(define_expand "@aarch64_split_simd_mov<mode>"
[(set (match_operand:VQMOV 0)
(match_operand:VQMOV 1))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
rtx dst = operands[0];
rtx src = operands[1];
(vec_select:<VHALF>
(match_operand:VQMOV 1 "register_operand")
(match_operand 2 "ascending_int_parallel")))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
+ {
+ if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
+ {
+ emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
+ DONE;
+ }
+ }
)
(define_expand "aarch64_get_low<mode>"
[(match_operand:<VHALF> 0 "register_operand")
(match_operand:VQMOV 1 "register_operand")]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
(define_expand "aarch64_get_high<mode>"
[(match_operand:<VHALF> 0 "register_operand")
(match_operand:VQMOV 1 "register_operand")]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
)
(define_insn "aarch64_simd_mov_from_<mode>high"
- [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
+ [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")
(vec_select:<VHALF>
- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
+ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w")
(match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
"@
- dup\\t%d0, %1.d[1]
- umov\t%0, %1.d[1]"
- [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
+ dup\t%d0, %1.d[1]
+ umov\t%0, %1.d[1]
+ fmov\t%0, %1.d[1]"
+ [(set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc")
+ (set_attr "arch" "simd,simd,*")
(set_attr "length" "4")]
)
[(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
)
+(define_insn "*aarch64_get_high<mode>"
+ [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
+ (vec_select:<VEL>
+ (match_operand:VQ_2E 1 "register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand")])))]
+ "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
+ "fmov\t%0, %1.d[1]"
+ [(set_attr "type" "f_mrc")]
+)
+
(define_insn "load_pair_lanes<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "memory_operand" "Utq")
(match_operand:VDCSIF 2 "memory_operand" "m")))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
"ldr\\t%<single_dtype>0, %1"
[(set_attr "type" "neon_load1_1reg<dblq>")]
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "register_operand" "w, r")
(match_operand:VDCSIF 2 "register_operand" "w, r")))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
"@
stp\t%<single_type>1, %<single_type>2, %y0
stp\t%<single_wx>1, %<single_wx>2, %y0"
;; the register alternatives either don't accept or themselves disparage.
(define_insn "*aarch64_combine_internal<mode>"
- [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
+ [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
(vec_concat:<VDBL>
- (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")
- (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))]
- "TARGET_SIMD
+ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")
+ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, w, ?r")))]
+ "TARGET_FLOAT
&& !BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <VDBL>mode)
|| register_operand (operands[2], <MODE>mode))"
"@
ins\t%0.<single_type>[1], %2.<single_type>[0]
ins\t%0.<single_type>[1], %<single_wx>2
+ fmov\t%0.d[1], %2
ld1\t{%0.<single_type>}[1], %2
stp\t%<single_type>1, %<single_type>2, %y0
stp\t%<single_wx>1, %<single_wx>2, %y0"
- [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
+ [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr,
+ neon_load1_one_lane<dblq>, neon_stp, store_16")
+ (set_attr "arch" "simd,simd,*,simd,*,*")]
)
(define_insn "*aarch64_combine_internal_be<mode>"
- [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
+ [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
(vec_concat:<VDBL>
- (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r")
- (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")))]
- "TARGET_SIMD
+ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, ?w, ?r")
+ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")))]
+ "TARGET_FLOAT
&& BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <VDBL>mode)
|| register_operand (operands[2], <MODE>mode))"
"@
ins\t%0.<single_type>[1], %2.<single_type>[0]
ins\t%0.<single_type>[1], %<single_wx>2
+ fmov\t%0.d[1], %2
ld1\t{%0.<single_type>}[1], %2
stp\t%<single_type>2, %<single_type>1, %y0
stp\t%<single_wx>2, %<single_wx>1, %y0"
- [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
+ [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr, neon_load1_one_lane<dblq>, neon_stp, store_16")
+ (set_attr "arch" "simd,simd,*,simd,*,*")]
)
;; In this insn, operand 1 should be low, and operand 2 the high part of the
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")
(match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
- "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+ "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
"@
fmov\\t%<single_type>0, %<single_type>1
fmov\t%<single_type>0, %<single_wx>1
ldr\\t%<single_type>0, %1"
- [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
- (set_attr "arch" "simd,fp,simd")]
+ [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
)
(define_insn "*aarch64_combinez_be<mode>"
(vec_concat:<VDBL>
(match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
(match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "TARGET_FLOAT && BYTES_BIG_ENDIAN"
"@
fmov\\t%<single_type>0, %<single_type>1
fmov\t%<single_type>0, %<single_wx>1
ldr\\t%<single_type>0, %1"
- [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
- (set_attr "arch" "simd,fp,simd")]
+ [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
)
;; Form a vector whose first half (in array order) comes from operand 1
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "general_operand")
(match_operand:VDCSIF 2 "general_operand")))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
int lo = BYTES_BIG_ENDIAN ? 2 : 1;
int hi = BYTES_BIG_ENDIAN ? 1 : 2;
}
else
{
- /* Use *aarch64_combine_general<mode>. */
+ /* Use *aarch64_combine_internal<mode>. */
operands[lo] = force_reg (<MODE>mode, operands[lo]);
if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
{
[(match_operand:<VDBL> 0 "register_operand")
(match_operand:VDC 1 "general_operand")
(match_operand:VDC 2 "general_operand")]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
if (BYTES_BIG_ENDIAN)
std::swap (operands[1], operands[2]);
(define_expand "mov<mode>"
[(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
(match_operand:VSTRUCT_QD 1 "general_operand"))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
if (can_create_pseudo_p ())
{
(define_expand "mov<mode>"
[(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
(match_operand:VSTRUCT 1 "general_operand"))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
if (can_create_pseudo_p ())
{
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
(match_operand:VSTRUCT_2D 1 "general_operand" " w,w,m"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"@
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
(match_operand:VSTRUCT_2Q 1 "general_operand" " w,w,m"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"@
stp\\t%q1, %R1, %0
ldp\\t%q0, %R0, %1"
[(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
+ (set_attr "arch" "simd,*,*")
(set_attr "length" "8,4,4")]
)
(define_insn "*aarch64_be_movoi"
[(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
(match_operand:OI 1 "general_operand" " w,w,m"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], OImode)
|| register_operand (operands[1], OImode))"
"@
stp\\t%q1, %R1, %0
ldp\\t%q0, %R0, %1"
[(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
+ (set_attr "arch" "simd,*,*")
(set_attr "length" "8,4,4")]
)
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
(match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"#"
[(set_attr "type" "multiple")
+ (set_attr "arch" "fp<q>,*,*")
(set_attr "length" "12,8,8")]
)
(define_insn "*aarch64_be_movci"
[(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:CI 1 "general_operand" " w,w,o"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], CImode)
|| register_operand (operands[1], CImode))"
"#"
[(set_attr "type" "multiple")
- (set_attr "length" "12,4,4")]
+ (set_attr "arch" "simd,*,*")
+ (set_attr "length" "12,8,8")]
)
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
(match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"#"
[(set_attr "type" "multiple")
+ (set_attr "arch" "fp<q>,*,*")
(set_attr "length" "16,8,8")]
)
(define_insn "*aarch64_be_movxi"
[(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:XI 1 "general_operand" " w,w,o"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], XImode)
|| register_operand (operands[1], XImode))"
"#"
[(set_attr "type" "multiple")
- (set_attr "length" "16,4,4")]
+ (set_attr "arch" "simd,*,*")
+ (set_attr "length" "16,8,8")]
)
(define_split
[(set (match_operand:VSTRUCT_2QD 0 "register_operand")
(match_operand:VSTRUCT_2QD 1 "register_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
(define_split
[(set (match_operand:OI 0 "register_operand")
(match_operand:OI 1 "register_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
(define_split
[(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
(match_operand:VSTRUCT_3QD 1 "general_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], <MODE>mode)
aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
DONE;
}
- else if (BYTES_BIG_ENDIAN)
+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
{
int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
(define_split
[(set (match_operand:CI 0 "nonimmediate_operand")
(match_operand:CI 1 "general_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], CImode)
aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
DONE;
}
- else if (BYTES_BIG_ENDIAN)
+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
{
emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
simplify_gen_subreg (OImode, operands[1], CImode, 0));
(define_split
[(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
(match_operand:VSTRUCT_4QD 1 "general_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], <MODE>mode)
aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
DONE;
}
- else if (BYTES_BIG_ENDIAN)
+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
{
int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
(define_split
[(set (match_operand:XI 0 "nonimmediate_operand")
(match_operand:XI 1 "general_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], XImode)
aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
DONE;
}
- else if (BYTES_BIG_ENDIAN)
+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
{
emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
simplify_gen_subreg (OImode, operands[1], XImode, 0));
case E_OImode:
case E_CImode:
case E_XImode:
- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0;
+ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
/* Structures of 64-bit Advanced SIMD vectors. */
case E_V2x8QImode:
case E_V4x4HFmode:
case E_V4x2SFmode:
case E_V4x1DFmode:
- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0;
+ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0;
/* Structures of 128-bit Advanced SIMD vectors. */
case E_V2x16QImode:
case E_V4x8HFmode:
case E_V4x4SFmode:
case E_V4x2DFmode:
- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0;
+ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
/* 64-bit Advanced SIMD vectors. */
case E_V8QImode:
case E_V8BFmode:
case E_V4SFmode:
case E_V2DFmode:
- return TARGET_SIMD ? VEC_ADVSIMD : 0;
+ return TARGET_FLOAT ? VEC_ADVSIMD : 0;
default:
return 0;
}
/* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */
- if ((vec_flags & VEC_ADVSIMD)
+ if (TARGET_SIMD
+ && (vec_flags & VEC_ADVSIMD)
&& known_eq (nunits, 0U)
&& known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
&& maybe_ge (GET_MODE_BITSIZE (element_mode)
if (GP_REGNUM_P (regno))
{
- if (vec_flags & VEC_ANY_SVE)
+ if (vec_flags & (VEC_ANY_SVE | VEC_STRUCT))
return false;
if (known_le (GET_MODE_SIZE (mode), 8))
return true;
|| mode == TImode
|| mode == TFmode
|| mode == TDmode
- || (BYTES_BIG_ENDIAN && advsimd_struct_p));
+ || ((!TARGET_SIMD || BYTES_BIG_ENDIAN)
+ && advsimd_struct_p));
/* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
corresponds to the actual size of the memory being loaded/stored and the
mode of the corresponding addressing mode is half of that. */
/* On LE, for AdvSIMD, don't support anything other than POST_INC or
REG addressing. */
if (advsimd_struct_p
+ && TARGET_SIMD
&& !BYTES_BIG_ENDIAN
&& (code != POST_INC && code != REG))
return false;
&& aarch64_offset_7bit_signed_scaled_p (DImode, offset + 48));
/* A 7bit offset check because OImode will emit a ldp/stp
- instruction (only big endian will get here).
+ instruction (only !TARGET_SIMD or big endian will get here).
For ldp/stp instructions, the offset is scaled for the size of a
single element of the pair. */
if (aarch64_advsimd_partial_struct_mode_p (mode)
return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
/* Three 9/12 bit offsets checks because CImode will emit three
- ldr/str instructions (only big endian will get here). */
+ ldr/str instructions (only !TARGET_SIMD or big endian will
+ get here). */
if (aarch64_advsimd_partial_struct_mode_p (mode)
&& known_eq (GET_MODE_SIZE (mode), 24))
return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
/* Use aarch64_sve_reload_mem for SVE memory reloads that cannot use
LDR and STR. See the comment at the head of aarch64-sve.md for
more details about the big-endian handling. */
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
if (reg_class_subset_p (rclass, FP_REGS)
&& !((REG_P (x) && HARD_REGISTER_P (x))
|| aarch64_simd_valid_immediate (x, NULL))
- && mode != VNx16QImode)
+ && mode != VNx16QImode
+ && (vec_flags & VEC_SVE_DATA)
+ && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
{
- unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- if ((vec_flags & VEC_SVE_DATA)
- && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
- {
- sri->icode = CODE_FOR_aarch64_sve_reload_mem;
- return NO_REGS;
- }
+ sri->icode = CODE_FOR_aarch64_sve_reload_mem;
+ return NO_REGS;
}
/* If we have to disable direct literal pool loads and stores because the
/* Without the TARGET_SIMD instructions we cannot move a Q register
to a Q register directly. We need a scratch. */
if (REG_P (x)
- && (mode == TFmode || mode == TImode || mode == TDmode)
+ && (mode == TFmode
+ || mode == TImode
+ || mode == TDmode
+ || (vec_flags == VEC_ADVSIMD && known_eq (GET_MODE_SIZE (mode), 16)))
&& mode == GET_MODE (x)
- && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
+ && !TARGET_SIMD
+ && FP_REGNUM_P (REGNO (x))
&& reg_class_subset_p (rclass, FP_REGS))
{
sri->icode = code_for_aarch64_reload_mov (mode);
return NO_REGS;
}
+/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
+
+static bool
+aarch64_secondary_memory_needed (machine_mode mode, reg_class_t class1,
+ reg_class_t class2)
+{
+ if (!TARGET_SIMD
+ && reg_classes_intersect_p (class1, FP_REGS)
+ && reg_classes_intersect_p (class2, FP_REGS))
+ {
+ /* We can't do a 128-bit FPR-to-FPR move without TARGET_SIMD,
+ so we can't easily split a move involving tuples of 128-bit
+ vectors. Force the copy through memory instead.
+
+ (Tuples of 64-bit vectors are fine.) */
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+ if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
+ return true;
+ }
+ return false;
+}
+
static bool
aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
{
if (VECTOR_MODE_P (mode))
{
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- if (vec_flags & VEC_ADVSIMD)
+ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
{
/* The select-operand-high-half versions of the instruction have the
same cost as the three vector version - don't add the costs of the
{
/* SUBL2 and SUBW2. */
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- if (vec_flags & VEC_ADVSIMD)
+ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
{
/* The select-operand-high-half versions of the sub instruction
have the same cost as the regular three vector version -
{
/* ADDL2 and ADDW2. */
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- if (vec_flags & VEC_ADVSIMD)
+ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
{
/* The select-operand-high-half versions of the add instruction
have the same cost as the regular three vector version -
return aarch64_register_move_cost (mode, from, GENERAL_REGS)
+ aarch64_register_move_cost (mode, GENERAL_REGS, to);
- if (known_eq (GET_MODE_SIZE (mode), 16))
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+ if (vec_flags != (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL)
+ && known_eq (GET_MODE_SIZE (mode), 16))
{
/* 128-bit operations on general registers require 2 instructions. */
if (from == GENERAL_REGS && to == GENERAL_REGS)
else if (to == GENERAL_REGS)
return regmove_cost->FP2GP;
+ if (!TARGET_SIMD && vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
+ {
+ /* Needs a round-trip through memory, which can use LDP/STP for pairs.
+ The cost must be greater than 2 units to indicate that direct
+ moves aren't possible. */
+ auto per_vector = (aarch64_tune_params.memmov_cost.load_fp
+ + aarch64_tune_params.memmov_cost.store_fp);
+ return MIN (CEIL (per_vector, 2), 4);
+ }
+
return regmove_cost->FP2FP;
}
if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
return false;
+ if ((vec_flags & VEC_ADVSIMD) && !TARGET_SIMD)
+ return false;
+
if (vec_flags & VEC_SVE_PRED)
return aarch64_sve_pred_valid_immediate (op, info);
std::swap (d->op0, d->op1);
}
- if ((d->vec_flags == VEC_ADVSIMD
+ if (((d->vec_flags == VEC_ADVSIMD && TARGET_SIMD)
|| d->vec_flags == VEC_SVE_DATA
|| d->vec_flags == (VEC_SVE_DATA | VEC_PARTIAL)
|| d->vec_flags == VEC_SVE_PRED)
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
+#undef TARGET_SECONDARY_MEMORY_NEEDED
+#define TARGET_SECONDARY_MEMORY_NEEDED aarch64_secondary_memory_needed
+
#undef TARGET_SHIFT_TRUNCATION_MASK
#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask