;; Machine description for AArch64 AdvSIMD architecture.
-;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2020 Free Software Foundation, Inc.
;; Contributed by ARM Ltd.
;;
;; This file is part of GCC.
;; <http://www.gnu.org/licenses/>.
(define_expand "mov<mode>"
- [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
- (match_operand:VALL_F16 1 "general_operand" ""))]
+ [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
+ (match_operand:VALL_F16 1 "general_operand"))]
"TARGET_SIMD"
"
/* Force the operand into a register if it is not an
normal str, so the check need not apply. */
if (GET_CODE (operands[0]) == MEM
&& !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
- && ((GET_MODE_SIZE (<MODE>mode) == 16
+ && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
&& aarch64_mem_pair_operand (operands[0], DImode))
- || GET_MODE_SIZE (<MODE>mode) == 8)))
+ || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
operands[1] = force_reg (<MODE>mode, operands[1]);
"
)
(define_expand "movmisalign<mode>"
- [(set (match_operand:VALL 0 "nonimmediate_operand" "")
- (match_operand:VALL 1 "general_operand" ""))]
+ [(set (match_operand:VALL 0 "nonimmediate_operand")
+ (match_operand:VALL 1 "general_operand"))]
"TARGET_SIMD"
{
/* This pattern is not permitted to fail during expansion: if both arguments
)))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_dup<q>")]
)))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
- INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_dup<q>")]
case 5: return "fmov\t%d0, %1";
case 6: return "mov\t%0, %1";
case 7:
- return aarch64_output_simd_mov_immediate (operands[1],
- <MODE>mode, 64);
+ return aarch64_output_simd_mov_immediate (operands[1], 64);
default: gcc_unreachable ();
}
}
(define_insn "*aarch64_simd_mov<VQ:mode>"
[(set (match_operand:VQ 0 "nonimmediate_operand"
- "=w, Umq, m, w, ?r, ?w, ?r, w")
+ "=w, Umn, m, w, ?r, ?w, ?r, w")
(match_operand:VQ 1 "general_operand"
"m, Dz, w, w, w, r, r, Dn"))]
"TARGET_SIMD
case 6:
return "#";
case 7:
- return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
+ return aarch64_output_simd_mov_immediate (operands[1], 128);
default:
gcc_unreachable ();
}
(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
(parallel [(match_operand 2 "const_int_operand" "n")])))]
"TARGET_SIMD
- && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
+ && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
"str\\t%<Vetype>1, %0"
[(set_attr "type" "neon_store1_1reg<q>")]
)
-(define_insn "load_pair<mode>"
- [(set (match_operand:VD 0 "register_operand" "=w")
- (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
- (set (match_operand:VD 2 "register_operand" "=w")
- (match_operand:VD 3 "memory_operand" "m"))]
+(define_insn "load_pair<DREG:mode><DREG2:mode>"
+ [(set (match_operand:DREG 0 "register_operand" "=w")
+ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
+ (set (match_operand:DREG2 2 "register_operand" "=w")
+ (match_operand:DREG2 3 "memory_operand" "m"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
- GET_MODE_SIZE (<MODE>mode)))"
+ GET_MODE_SIZE (<DREG:MODE>mode)))"
"ldp\\t%d0, %d2, %1"
[(set_attr "type" "neon_ldp")]
)
-(define_insn "store_pair<mode>"
- [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
- (match_operand:VD 1 "register_operand" "w"))
- (set (match_operand:VD 2 "memory_operand" "=m")
- (match_operand:VD 3 "register_operand" "w"))]
+(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
+ (match_operand:DREG 1 "register_operand" "w"))
+ (set (match_operand:DREG2 2 "memory_operand" "=m")
+ (match_operand:DREG2 3 "register_operand" "w"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
- GET_MODE_SIZE (<MODE>mode)))"
+ GET_MODE_SIZE (<DREG:MODE>mode)))"
"stp\\t%d1, %d3, %0"
[(set_attr "type" "neon_stp")]
)
+(define_insn "load_pair<VQ:mode><VQ2:mode>"
+ [(set (match_operand:VQ 0 "register_operand" "=w")
+ (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
+ (set (match_operand:VQ2 2 "register_operand" "=w")
+ (match_operand:VQ2 3 "memory_operand" "m"))]
+ "TARGET_SIMD
+ && rtx_equal_p (XEXP (operands[3], 0),
+ plus_constant (Pmode,
+ XEXP (operands[1], 0),
+ GET_MODE_SIZE (<VQ:MODE>mode)))"
+ "ldp\\t%q0, %q2, %1"
+ [(set_attr "type" "neon_ldp_q")]
+)
+
+(define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
+ [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
+ (match_operand:VQ 1 "register_operand" "w"))
+ (set (match_operand:VQ2 2 "memory_operand" "=m")
+ (match_operand:VQ2 3 "register_operand" "w"))]
+ "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[0], 0),
+ GET_MODE_SIZE (<VQ:MODE>mode)))"
+ "stp\\t%q1, %q3, %0"
+ [(set_attr "type" "neon_stp_q")]
+)
+
+
(define_split
[(set (match_operand:VQ 0 "register_operand" "")
(match_operand:VQ 1 "register_operand" ""))]
DONE;
})
-(define_expand "aarch64_split_simd_mov<mode>"
+(define_expand "@aarch64_split_simd_mov<mode>"
[(set (match_operand:VQ 0)
(match_operand:VQ 1))]
"TARGET_SIMD"
{
rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
- rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
- rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
+ rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn
(gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
}
)
+;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
+;; fact that their usage need to guarantee that the source vectors are
+;; contiguous. It would be wrong to describe the operation without being able
+;; to describe the permute that is also required, but even if that is done
+;; the permute would have been created as a LOAD_LANES which means the values
+;; in the registers are in the wrong order.
+(define_insn "aarch64_fcadd<rot><mode>"
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")]
+ FCADD))]
+ "TARGET_COMPLEX"
+ "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
+ [(set_attr "type" "neon_fcadd")]
+)
+
+(define_insn "aarch64_fcmla<rot><mode>"
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
+ (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
+ (match_operand:VHSDF 3 "register_operand" "w")]
+ FCMLA)))]
+ "TARGET_COMPLEX"
+ "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
+ [(set_attr "type" "neon_fcmla")]
+)
+
+
+(define_insn "aarch64_fcmla_lane<rot><mode>"
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
+ (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
+ (match_operand:VHSDF 3 "register_operand" "w")
+ (match_operand:SI 4 "const_int_operand" "n")]
+ FCMLA)))]
+ "TARGET_COMPLEX"
+{
+ operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
+ return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
+}
+ [(set_attr "type" "neon_fcmla")]
+)
+
+(define_insn "aarch64_fcmla_laneq<rot>v4hf"
+ [(set (match_operand:V4HF 0 "register_operand" "=w")
+ (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
+ (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V8HF 3 "register_operand" "w")
+ (match_operand:SI 4 "const_int_operand" "n")]
+ FCMLA)))]
+ "TARGET_COMPLEX"
+{
+ operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+ return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
+}
+ [(set_attr "type" "neon_fcmla")]
+)
+
+(define_insn "aarch64_fcmlaq_lane<rot><mode>"
+ [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
+ (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
+ (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
+ (match_operand:<VHALF> 3 "register_operand" "w")
+ (match_operand:SI 4 "const_int_operand" "n")]
+ FCMLA)))]
+ "TARGET_COMPLEX"
+{
+ int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
+ operands[4]
+ = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
+ return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
+}
+ [(set_attr "type" "neon_fcmla")]
+)
+
;; These instructions map to the __builtins for the Dot Product operations.
(define_insn "aarch64_<sur>dot<vsi2qi>"
[(set (match_operand:VS 0 "register_operand" "=w")
DOTPROD)))]
"TARGET_DOTPROD"
"<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
- [(set_attr "type" "neon_dot")]
+ [(set_attr "type" "neon_dot<q>")]
)
;; These expands map to the Dot Product optab the vectorizer checks for.
DOTPROD)))]
"TARGET_DOTPROD"
{
- operands[4]
- = GEN_INT (ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
}
- [(set_attr "type" "neon_dot")]
+ [(set_attr "type" "neon_dot<q>")]
)
(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
DOTPROD)))]
"TARGET_DOTPROD"
{
- operands[4]
- = GEN_INT (ENDIAN_LANE_N (V16QImode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
}
- [(set_attr "type" "neon_dot")]
+ [(set_attr "type" "neon_dot<q>")]
)
(define_expand "copysign<mode>3"
(match_operand:VMUL 3 "register_operand" "w")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
(match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
- INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
)
-(define_insn "aarch64_rsqrte<mode>"
+(define_insn "@aarch64_rsqrte<mode>"
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
UNSPEC_RSQRTE))]
"frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
-(define_insn "aarch64_rsqrts<mode>"
+(define_insn "@aarch64_rsqrts<mode>"
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
[(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
(define_expand "rsqrt<mode>2"
- [(set (match_operand:VALLF 0 "register_operand" "=w")
- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+ [(set (match_operand:VALLF 0 "register_operand")
+ (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
UNSPEC_RSQRT))]
"TARGET_SIMD"
{
(match_operand:DF 3 "register_operand" "w")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
}
[(set_attr "type" "neon_fp_mul_d_scalar_q")]
[(set_attr "type" "neon_abs<q>")]
)
-(define_insn "abd<mode>_3"
+;; It's tempting to represent SABD as ABS (MINUS op1 op2).
+;; This isn't accurate as ABS treats always its input as a signed value.
+;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
+;; Whereas SABD would return 192 (-64 signed) on the above example.
+;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
+(define_insn "aarch64_<su>abd<mode>_3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
- (abs:VDQ_BHSI (minus:VDQ_BHSI
- (match_operand:VDQ_BHSI 1 "register_operand" "w")
- (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
+ (minus:VDQ_BHSI
+ (USMAX:VDQ_BHSI
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
+ (match_operand:VDQ_BHSI 2 "register_operand" "w"))
+ (<max_opp>:VDQ_BHSI
+ (match_dup 1)
+ (match_dup 2))))]
+ "TARGET_SIMD"
+ "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_abd<q>")]
+)
+
+(define_insn "aarch64_<sur>abdl2<mode>_3"
+ [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+ (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
+ (match_operand:VDQV_S 2 "register_operand" "w")]
+ ABDL2))]
"TARGET_SIMD"
- "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_abd<q>")]
)
+(define_insn "aarch64_<sur>abal<mode>_4"
+ [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+ (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
+ (match_operand:VDQV_S 2 "register_operand" "w")
+ (match_operand:<VDBLW> 3 "register_operand" "0")]
+ ABAL))]
+ "TARGET_SIMD"
+ "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
+ [(set_attr "type" "neon_arith_acc<q>")]
+)
+
+(define_insn "aarch64_<sur>adalp<mode>_3"
+ [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+ (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
+ (match_operand:<VDBLW> 2 "register_operand" "0")]
+ ADALP))]
+ "TARGET_SIMD"
+ "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
+ [(set_attr "type" "neon_reduc_add<q>")]
+)
+
+;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
+;; inputs in operands 1 and 2. The sequence also has to perform a widening
+;; reduction of the difference into a V4SI vector and accumulate that into
+;; operand 3 before copying that into the result operand 0.
+;; Perform that with a sequence of:
+;; UABDL2 tmp.8h, op1.16b, op2.16b
+;; UABAL tmp.8h, op1.16b, op2.16b
+;; UADALP op3.4s, tmp.8h
+;; MOV op0, op3 // should be eliminated in later passes.
+;;
+;; For TARGET_DOTPROD we do:
+;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
+;; UABD tmp2.16b, op1.16b, op2.16b
+;; UDOT op3.4s, tmp2.16b, tmp1.16b
+;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
+;;
+;; The signed version just uses the signed variants of the above instructions
+;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
+;; unsigned.
+
+(define_expand "<sur>sadv16qi"
+ [(use (match_operand:V4SI 0 "register_operand"))
+ (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
+ (use (match_operand:V16QI 2 "register_operand"))] ABAL)
+ (use (match_operand:V4SI 3 "register_operand"))]
+ "TARGET_SIMD"
+ {
+ if (TARGET_DOTPROD)
+ {
+ rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
+ rtx abd = gen_reg_rtx (V16QImode);
+ emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
+ emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
+ abd, ones));
+ DONE;
+ }
+ rtx reduc = gen_reg_rtx (V8HImode);
+ emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
+ operands[2]));
+ emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
+ operands[2], reduc));
+ emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
+ operands[3]));
+ emit_move_insn (operands[0], operands[3]);
+ DONE;
+ }
+)
+
(define_insn "aba<mode>_3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
case 0:
return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
case 1:
- return aarch64_output_simd_mov_immediate (operands[2],
- <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
+ return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
+ AARCH64_CHECK_BIC);
default:
gcc_unreachable ();
}
case 0:
return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
case 1:
- return aarch64_output_simd_mov_immediate (operands[2],
- <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
+ return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
+ AARCH64_CHECK_ORR);
default:
gcc_unreachable ();
}
)
(define_insn "aarch64_simd_vec_set<mode>"
- [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
- (vec_merge:VDQ_BHSI
- (vec_duplicate:VDQ_BHSI
- (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
- (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
+ [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
+ (vec_merge:VALL_F16
+ (vec_duplicate:VALL_F16
+ (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
+ (match_operand:VALL_F16 3 "register_operand" "0,0,0")
(match_operand:SI 2 "immediate_operand" "i,i,i")))]
"TARGET_SIMD"
{
- int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
+ int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
switch (which_alternative)
{
case 0:
- return "ins\\t%0.<Vetype>[%p2], %w1";
- case 1:
return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
+ case 1:
+ return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
case 2:
return "ld1\\t{%0.<Vetype>}[%p2], %1";
default:
gcc_unreachable ();
}
}
- [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
+ [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
)
(define_insn "*aarch64_simd_vec_copy_lane<mode>"
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
{
- int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
+ int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
- operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
}
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
{
- int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
+ int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
- INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
+ INTVAL (operands[4]));
return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_ins<q>")]
)
+(define_expand "signbit<mode>2"
+ [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
+ (use (match_operand:VDQSF 1 "register_operand"))]
+ "TARGET_SIMD"
+{
+ int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
+ rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+ shift_amount);
+ operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+
+ emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
+ shift_vector));
+ DONE;
+})
+
(define_insn "aarch64_simd_lshr<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
[(set_attr "type" "neon_shift_imm<q>")]
)
+(define_insn "*aarch64_simd_sra<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+ (plus:VDQ_I
+ (SHIFTRT:VDQ_I
+ (match_operand:VDQ_I 1 "register_operand" "w")
+ (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
+ (match_operand:VDQ_I 3 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
+ [(set_attr "type" "neon_shift_acc<q>")]
+)
+
(define_insn "aarch64_simd_imm_shl<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
)
(define_expand "ashl<mode>3"
- [(match_operand:VDQ_I 0 "register_operand" "")
- (match_operand:VDQ_I 1 "register_operand" "")
- (match_operand:SI 2 "general_operand" "")]
+ [(match_operand:VDQ_I 0 "register_operand")
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:SI 2 "general_operand")]
"TARGET_SIMD"
{
int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
)
(define_expand "lshr<mode>3"
- [(match_operand:VDQ_I 0 "register_operand" "")
- (match_operand:VDQ_I 1 "register_operand" "")
- (match_operand:SI 2 "general_operand" "")]
+ [(match_operand:VDQ_I 0 "register_operand")
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:SI 2 "general_operand")]
"TARGET_SIMD"
{
int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
)
(define_expand "ashr<mode>3"
- [(match_operand:VDQ_I 0 "register_operand" "")
- (match_operand:VDQ_I 1 "register_operand" "")
- (match_operand:SI 2 "general_operand" "")]
+ [(match_operand:VDQ_I 0 "register_operand")
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:SI 2 "general_operand")]
"TARGET_SIMD"
{
int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
)
(define_expand "vashl<mode>3"
- [(match_operand:VDQ_I 0 "register_operand" "")
- (match_operand:VDQ_I 1 "register_operand" "")
- (match_operand:VDQ_I 2 "register_operand" "")]
+ [(match_operand:VDQ_I 0 "register_operand")
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:VDQ_I 2 "register_operand")]
"TARGET_SIMD"
{
emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
;; Negating individual lanes most certainly offsets the
;; gain from vectorization.
(define_expand "vashr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand" "")
- (match_operand:VDQ_BHSI 1 "register_operand" "")
- (match_operand:VDQ_BHSI 2 "register_operand" "")]
+ [(match_operand:VDQ_BHSI 0 "register_operand")
+ (match_operand:VDQ_BHSI 1 "register_operand")
+ (match_operand:VDQ_BHSI 2 "register_operand")]
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (<MODE>mode);
;; DI vector shift
(define_expand "aarch64_ashr_simddi"
- [(match_operand:DI 0 "register_operand" "=w")
- (match_operand:DI 1 "register_operand" "w")
- (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:SI 2 "aarch64_shift_imm64_di")]
"TARGET_SIMD"
{
/* An arithmetic shift right by 64 fills the result with copies of the sign
)
(define_expand "vlshr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand" "")
- (match_operand:VDQ_BHSI 1 "register_operand" "")
- (match_operand:VDQ_BHSI 2 "register_operand" "")]
+ [(match_operand:VDQ_BHSI 0 "register_operand")
+ (match_operand:VDQ_BHSI 1 "register_operand")
+ (match_operand:VDQ_BHSI 2 "register_operand")]
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (<MODE>mode);
})
(define_expand "aarch64_lshr_simddi"
- [(match_operand:DI 0 "register_operand" "=w")
- (match_operand:DI 1 "register_operand" "w")
- (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:SI 2 "aarch64_shift_imm64_di")]
"TARGET_SIMD"
{
if (INTVAL (operands[2]) == 64)
}
)
-(define_expand "vec_set<mode>"
- [(match_operand:VDQ_BHSI 0 "register_operand")
- (match_operand:<VEL> 1 "register_operand")
- (match_operand:SI 2 "immediate_operand")]
- "TARGET_SIMD"
- {
- HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
- emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
- GEN_INT (elem), operands[0]));
- DONE;
- }
-)
-
;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
(define_insn "vec_shr_<mode>"
[(set (match_operand:VD 0 "register_operand" "=w")
[(set_attr "type" "neon_shift_imm")]
)
-(define_insn "aarch64_simd_vec_setv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=w,w")
- (vec_merge:V2DI
- (vec_duplicate:V2DI
- (match_operand:DI 1 "register_operand" "r,w"))
- (match_operand:V2DI 3 "register_operand" "0,0")
- (match_operand:SI 2 "immediate_operand" "i,i")))]
- "TARGET_SIMD"
- {
- int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
- operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
- switch (which_alternative)
- {
- case 0:
- return "ins\\t%0.d[%p2], %1";
- case 1:
- return "ins\\t%0.d[%p2], %1.d[0]";
- default:
- gcc_unreachable ();
- }
- }
- [(set_attr "type" "neon_from_gp, neon_ins_q")]
-)
-
-(define_expand "vec_setv2di"
- [(match_operand:V2DI 0 "register_operand")
- (match_operand:DI 1 "register_operand")
- (match_operand:SI 2 "immediate_operand")]
- "TARGET_SIMD"
- {
- HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
- emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
- GEN_INT (elem), operands[0]));
- DONE;
- }
-)
-
-(define_insn "aarch64_simd_vec_set<mode>"
- [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
- (vec_merge:VDQF_F16
- (vec_duplicate:VDQF_F16
- (match_operand:<VEL> 1 "register_operand" "w"))
- (match_operand:VDQF_F16 3 "register_operand" "0")
- (match_operand:SI 2 "immediate_operand" "i")))]
- "TARGET_SIMD"
- {
- int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
-
- operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
- return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
- }
- [(set_attr "type" "neon_ins<q>")]
-)
-
(define_expand "vec_set<mode>"
- [(match_operand:VDQF_F16 0 "register_operand" "+w")
- (match_operand:<VEL> 1 "register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "")]
+ [(match_operand:VALL_F16 0 "register_operand")
+ (match_operand:<VEL> 1 "register_operand")
+ (match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
{
HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
(match_operand:VDQHS 4 "register_operand" "0")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
(match_operand:VDQHS 4 "register_operand" "0")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
- INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
(match_operand:VDQHS 3 "register_operand" "w"))))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
(match_operand:VDQHS 3 "register_operand" "w"))))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
- INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
(define_expand "<su><maxmin>v2di3"
- [(set (match_operand:V2DI 0 "register_operand" "")
- (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
- (match_operand:V2DI 2 "register_operand" "")))]
+ [(set (match_operand:V2DI 0 "register_operand")
+ (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
+ (match_operand:V2DI 2 "register_operand")))]
"TARGET_SIMD"
{
enum rtx_code cmp_operator;
fmov\\t%d0, %1
dup\\t%d0, %1"
[(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
- (set_attr "simd" "yes,*,yes")
- (set_attr "fp" "*,yes,*")
- (set_attr "length" "4")]
+ (set_attr "length" "4")
+ (set_attr "arch" "simd,fp,simd")]
)
(define_insn "move_lo_quad_internal_<mode>"
fmov\\t%d0, %1
dup\\t%d0, %1"
[(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
- (set_attr "simd" "yes,*,yes")
- (set_attr "fp" "*,yes,*")
- (set_attr "length" "4")]
+ (set_attr "length" "4")
+ (set_attr "arch" "simd,fp,simd")]
)
(define_insn "move_lo_quad_internal_be_<mode>"
fmov\\t%d0, %1
dup\\t%d0, %1"
[(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
- (set_attr "simd" "yes,*,yes")
- (set_attr "fp" "*,yes,*")
- (set_attr "length" "4")]
+ (set_attr "length" "4")
+ (set_attr "arch" "simd,fp,simd")]
)
(define_insn "move_lo_quad_internal_be_<mode>"
fmov\\t%d0, %1
dup\\t%d0, %1"
[(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
- (set_attr "simd" "yes,*,yes")
- (set_attr "fp" "*,yes,*")
- (set_attr "length" "4")]
+ (set_attr "length" "4")
+ (set_attr "arch" "simd,fp,simd")]
)
(define_expand "move_lo_quad_<mode>"
)
(define_expand "move_hi_quad_<mode>"
- [(match_operand:VQ 0 "register_operand" "")
- (match_operand:<VHALF> 1 "register_operand" "")]
+ [(match_operand:VQ 0 "register_operand")
+ (match_operand:<VHALF> 1 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
operands[1], p));
)
(define_expand "vec_pack_trunc_<mode>"
- [(match_operand:<VNARROWD> 0 "register_operand" "")
- (match_operand:VDN 1 "register_operand" "")
- (match_operand:VDN 2 "register_operand" "")]
+ [(match_operand:<VNARROWD> 0 "register_operand")
+ (match_operand:VDN 1 "register_operand")
+ (match_operand:VDN 2 "register_operand")]
"TARGET_SIMD"
{
rtx tempreg = gen_reg_rtx (<VDBL>mode);
(match_operand:VQW 2 "vect_par_cnst_lo_half" "")
)))]
"TARGET_SIMD"
- "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
+ "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
[(set_attr "type" "neon_shift_imm_long")]
)
(match_operand:VQW 2 "vect_par_cnst_hi_half" "")
)))]
"TARGET_SIMD"
- "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
+ "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
[(set_attr "type" "neon_shift_imm_long")]
)
(define_expand "vec_unpack<su>_hi_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "")
+ [(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
operands[1], p));
DONE;
)
(define_expand "vec_unpack<su>_lo_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "")
- (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
operands[1], p));
DONE;
)
(define_expand "vec_widen_<su>mult_lo_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "")
- (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
- (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+ (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
operands[1],
operands[2], p));
)
(define_expand "vec_widen_<su>mult_hi_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "")
- (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
- (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+ (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
operands[1],
operands[2], p));
)
(define_expand "div<mode>3"
- [(set (match_operand:VHSDF 0 "register_operand" "=w")
- (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
- (match_operand:VHSDF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand")
+ (div:VHSDF (match_operand:VHSDF 1 "register_operand")
+ (match_operand:VHSDF 2 "register_operand")))]
"TARGET_SIMD"
{
if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
(match_operand:VDQF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
}
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
(match_operand:VDQSF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
- INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
}
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
(match_operand:DF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
}
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
(define_insn "fnma<mode>4"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(fma:VHSDF
- (match_operand:VHSDF 1 "register_operand" "w")
- (neg:VHSDF
- (match_operand:VHSDF 2 "register_operand" "w"))
+ (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
+ (match_operand:VHSDF 2 "register_operand" "w")
(match_operand:VHSDF 3 "register_operand" "0")))]
"TARGET_SIMD"
"fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
(match_operand:VDQF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
}
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
(match_operand:VDQSF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
- INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
}
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
(match_operand:DF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
}
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
;; other big-endian patterns their behavior is as required.
(define_expand "vec_unpacks_lo_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "")
- (match_operand:VQ_HSF 1 "register_operand" "")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQ_HSF 1 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
operands[1], p));
DONE;
)
(define_expand "vec_unpacks_hi_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "")
- (match_operand:VQ_HSF 1 "register_operand" "")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQ_HSF 1 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
operands[1], p));
DONE;
)
(define_expand "aarch64_float_truncate_hi_<Vdbl>"
- [(match_operand:<VDBL> 0 "register_operand" "=w")
- (match_operand:VDF 1 "register_operand" "0")
- (match_operand:<VWIDE> 2 "register_operand" "w")]
+ [(match_operand:<VDBL> 0 "register_operand")
+ (match_operand:VDF 1 "register_operand")
+ (match_operand:<VWIDE> 2 "register_operand")]
"TARGET_SIMD"
{
rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
;; expression like:
;; a = (b < c) ? b : c;
-;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
-;; either explicitly or indirectly via -ffast-math.
+;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
+;; -fno-signed-zeros are enabled either explicitly or indirectly via
+;; -ffast-math.
;;
;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
;; The 'smax' and 'smin' RTL standard pattern names do not specify which
;; 'across lanes' add.
(define_expand "reduc_plus_scal_<mode>"
- [(match_operand:<VEL> 0 "register_operand" "=w")
- (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
+ [(match_operand:<VEL> 0 "register_operand")
+ (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
UNSPEC_ADDV)]
"TARGET_SIMD"
{
- rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+ rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
UNSPEC_FADDV))]
"TARGET_SIMD"
{
- rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
+ rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
rtx scratch = gen_reg_rtx (V4SFmode);
emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
;; 'across lanes' max and min ops.
;; Template for outputting a scalar, so we can create __builtins which can be
-;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
+;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
(define_expand "reduc_<maxmin_uns>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
FMAXMINV)]
"TARGET_SIMD"
{
- rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+ rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
operands[1]));
MAXMINV)]
"TARGET_SIMD"
{
- rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+ rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
operands[1]));
;; in *aarch64_simd_bsl<mode>_alt.
(define_insn "aarch64_simd_bsl<mode>_internal"
- [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
- (xor:VSDQ_I_DI
- (and:VSDQ_I_DI
- (xor:VSDQ_I_DI
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
+ (xor:VDQ_I
+ (and:VDQ_I
+ (xor:VDQ_I
(match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
- (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
- (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
+ (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
+ (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
(match_dup:<V_INT_EQUIV> 3)
))]
"TARGET_SIMD"
;; permutations of commutative operations, we have to have a separate pattern.
(define_insn "*aarch64_simd_bsl<mode>_alt"
- [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
- (xor:VSDQ_I_DI
- (and:VSDQ_I_DI
- (xor:VSDQ_I_DI
- (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
- (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
- (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
- (match_dup:VSDQ_I_DI 2)))]
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
+ (xor:VDQ_I
+ (and:VDQ_I
+ (xor:VDQ_I
+ (match_operand:VDQ_I 3 "register_operand" "w,w,0")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
+ (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
+ (match_dup:<V_INT_EQUIV> 2)))]
"TARGET_SIMD"
"@
bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
[(set_attr "type" "neon_bsl<q>")]
)
+;; DImode is special, we want to avoid computing operations which are
+;; more naturally computed in general purpose registers in the vector
+;; registers. If we do that, we need to move all three operands from general
+;; purpose registers to vector registers, then back again. However, we
+;; don't want to make this pattern an UNSPEC as we'd lose scope for
+;; optimizations based on the component operations of a BSL.
+;;
+;; That means we need a splitter back to the individual operations, if they
+;; would be better calculated on the integer side.
+
+(define_insn_and_split "aarch64_simd_bsldi_internal"
+ [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
+ (xor:DI
+ (and:DI
+ (xor:DI
+ (match_operand:DI 3 "register_operand" "w,0,w,r")
+ (match_operand:DI 2 "register_operand" "w,w,0,r"))
+ (match_operand:DI 1 "register_operand" "0,w,w,r"))
+ (match_dup:DI 3)
+ ))]
+ "TARGET_SIMD"
+ "@
+ bsl\\t%0.8b, %2.8b, %3.8b
+ bit\\t%0.8b, %2.8b, %1.8b
+ bif\\t%0.8b, %3.8b, %1.8b
+ #"
+ "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+ [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
+{
+ /* Split back to individual operations. If we're before reload, and
+ able to create a temporary register, do so. If we're after reload,
+ we've got an early-clobber destination register, so use that.
+ Otherwise, we can't create pseudos and we can't yet guarantee that
+ operands[0] is safe to write, so FAIL to split. */
+
+ rtx scratch;
+ if (reload_completed)
+ scratch = operands[0];
+ else if (can_create_pseudo_p ())
+ scratch = gen_reg_rtx (DImode);
+ else
+ FAIL;
+
+ emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
+ emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
+ emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
+ DONE;
+}
+ [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
+ (set_attr "length" "4,4,4,12")]
+)
+
+(define_insn_and_split "aarch64_simd_bsldi_alt"
+ [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
+ (xor:DI
+ (and:DI
+ (xor:DI
+ (match_operand:DI 3 "register_operand" "w,w,0,r")
+ (match_operand:DI 2 "register_operand" "w,0,w,r"))
+ (match_operand:DI 1 "register_operand" "0,w,w,r"))
+ (match_dup:DI 2)
+ ))]
+ "TARGET_SIMD"
+ "@
+ bsl\\t%0.8b, %3.8b, %2.8b
+ bit\\t%0.8b, %3.8b, %1.8b
+ bif\\t%0.8b, %2.8b, %1.8b
+ #"
+ "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+ [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
+{
+ /* Split back to individual operations. If we're before reload, and
+ able to create a temporary register, do so. If we're after reload,
+ we've got an early-clobber destination register, so use that.
+ Otherwise, we can't create pseudos and we can't yet guarantee that
+ operands[0] is safe to write, so FAIL to split. */
+
+ rtx scratch;
+ if (reload_completed)
+ scratch = operands[0];
+ else if (can_create_pseudo_p ())
+ scratch = gen_reg_rtx (DImode);
+ else
+ FAIL;
+
+ emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
+ emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
+ emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
+ DONE;
+}
+ [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
+ (set_attr "length" "4,4,4,12")]
+)
+
(define_expand "aarch64_simd_bsl<mode>"
[(match_operand:VALLDIF 0 "register_operand")
(match_operand:<V_INT_EQUIV> 1 "register_operand")
break;
}
/* Fall through. */
- case UNGE:
+ case UNLT:
std::swap (operands[2], operands[3]);
/* Fall through. */
- case UNLE:
+ case UNGT:
case GT:
comparison = gen_aarch64_cmgt<mode>;
break;
break;
}
/* Fall through. */
- case UNGT:
+ case UNLE:
std::swap (operands[2], operands[3]);
/* Fall through. */
- case UNLT:
+ case UNGE:
case GE:
comparison = gen_aarch64_cmge<mode>;
break;
case UNEQ:
case ORDERED:
case UNORDERED:
+ case LTGT:
break;
default:
gcc_unreachable ();
case UNGT:
case UNLE:
case UNLT:
- case NE:
- /* FCM returns false for lanes which are unordered, so if we use
- the inverse of the comparison we actually want to emit, then
- invert the result, we will end up with the correct result.
- Note that a NE NaN and NaN NE b are true for all a, b.
-
- Our transformations are:
- a UNGE b -> !(b GT a)
- a UNGT b -> !(b GE a)
- a UNLE b -> !(a GT b)
- a UNLT b -> !(a GE b)
- a NE b -> !(a EQ b) */
- gcc_assert (comparison != NULL);
- emit_insn (comparison (operands[0], operands[2], operands[3]));
- emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
+ {
+ /* All of the above must not raise any FP exceptions. Thus we first
+ check each operand for NaNs and force any elements containing NaN to
+ zero before using them in the compare.
+ Example: UN<cc> (a, b) -> UNORDERED (a, b) |
+ (cm<cc> (isnan (a) ? 0.0 : a,
+ isnan (b) ? 0.0 : b))
+ We use the following transformations for doing the comparisions:
+ a UNGE b -> a GE b
+ a UNGT b -> a GT b
+ a UNLE b -> b GE a
+ a UNLT b -> b GT a. */
+
+ rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
+ rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
+ rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
+ emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
+ emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
+ emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
+ emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
+ lowpart_subreg (<V_INT_EQUIV>mode,
+ operands[2],
+ <MODE>mode)));
+ emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
+ lowpart_subreg (<V_INT_EQUIV>mode,
+ operands[3],
+ <MODE>mode)));
+ gcc_assert (comparison != NULL);
+ emit_insn (comparison (operands[0],
+ lowpart_subreg (<MODE>mode,
+ tmp0, <V_INT_EQUIV>mode),
+ lowpart_subreg (<MODE>mode,
+ tmp1, <V_INT_EQUIV>mode)));
+ emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
+ }
break;
case LT:
case GT:
case GE:
case EQ:
+ case NE:
/* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
a GE b -> a GE b
a GT b -> a GT b
a LE b -> b GE a
a LT b -> b GT a
- a EQ b -> a EQ b */
+ a EQ b -> a EQ b
+ a NE b -> ~(a EQ b) */
gcc_assert (comparison != NULL);
emit_insn (comparison (operands[0], operands[2], operands[3]));
+ if (code == NE)
+ emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
break;
- case UNEQ:
- /* We first check (a > b || b > a) which is !UNEQ, inverting
- this result will then give us (a == b || a UNORDERED b). */
+ case LTGT:
+ /* LTGT is not guranteed to not generate a FP exception. So let's
+ go the faster way : ((a > b) || (b > a)). */
emit_insn (gen_aarch64_cmgt<mode> (operands[0],
operands[2], operands[3]));
emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
- emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
- break;
-
- case UNORDERED:
- /* Operands are ORDERED iff (a > b || b >= a), so we can compute
- UNORDERED as !ORDERED. */
- emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
- emit_insn (gen_aarch64_cmge<mode> (operands[0],
- operands[3], operands[2]));
- emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
- emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
break;
case ORDERED:
- emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
- emit_insn (gen_aarch64_cmge<mode> (operands[0],
- operands[3], operands[2]));
- emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
+ case UNORDERED:
+ case UNEQ:
+ /* cmeq (a, a) & cmeq (b, b). */
+ emit_insn (gen_aarch64_cmeq<mode> (operands[0],
+ operands[2], operands[2]));
+ emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
+ emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
+
+ if (code == UNORDERED)
+ emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
+ else if (code == UNEQ)
+ {
+ emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
+ emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
+ }
break;
default:
(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
(sign_extend:GPI
- (vec_select:<VEL>
+ (vec_select:<VDQQH:VEL>
(match_operand:VDQQH 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
+ INTVAL (operands[2]));
return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
}
- [(set_attr "type" "neon_to_gp<q>")]
+ [(set_attr "type" "neon_to_gp<VDQQH:q>")]
)
-(define_insn "*aarch64_get_lane_zero_extendsi<mode>"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (zero_extend:SI
- (vec_select:<VEL>
+(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (zero_extend:GPI
+ (vec_select:<VDQQH:VEL>
(match_operand:VDQQH 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
- return "umov\\t%w0, %1.<Vetype>[%2]";
+ operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
+ INTVAL (operands[2]));
+ return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
}
- [(set_attr "type" "neon_to_gp<q>")]
+ [(set_attr "type" "neon_to_gp<VDQQH:q>")]
)
;; Lane extraction of a value, neither sign nor zero extension
;; is guaranteed so upper bits should be considered undefined.
;; RTL uses GCC vector extension indices throughout so flip only for assembly.
(define_insn "aarch64_get_lane<mode>"
- [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+ [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
(vec_select:<VEL>
(match_operand:VALL_F16 1 "register_operand" "w, w, w")
(parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
switch (which_alternative)
{
case 0:
[(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
)
+(define_insn "load_pair_lanes<mode>"
+ [(set (match_operand:<VDBL> 0 "register_operand" "=w")
+ (vec_concat:<VDBL>
+ (match_operand:VDC 1 "memory_operand" "Utq")
+ (match_operand:VDC 2 "memory_operand" "m")))]
+ "TARGET_SIMD && !STRICT_ALIGNMENT
+ && rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[1], 0),
+ GET_MODE_SIZE (<MODE>mode)))"
+ "ldr\\t%q0, %1"
+ [(set_attr "type" "neon_load1_1reg_q")]
+)
+
+(define_insn "store_pair_lanes<mode>"
+ [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
+ (vec_concat:<VDBL>
+ (match_operand:VDC 1 "register_operand" "w, r")
+ (match_operand:VDC 2 "register_operand" "w, r")))]
+ "TARGET_SIMD"
+ "@
+ stp\\t%d1, %d2, %y0
+ stp\\t%x1, %x2, %y0"
+ [(set_attr "type" "neon_stp, store_16")]
+)
+
;; In this insn, operand 1 should be low, and operand 2 the high part of the
;; dest vector.
-(define_insn "*aarch64_combinez<mode>"
+(define_insn "@aarch64_combinez<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
- (vec_concat:<VDBL>
- (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
- (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
+ (vec_concat:<VDBL>
+ (match_operand:VDC 1 "general_operand" "w,?r,m")
+ (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"@
mov\\t%0.8b, %1.8b
fmov\t%d0, %1
ldr\\t%d0, %1"
[(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
- (set_attr "simd" "yes,*,yes")
- (set_attr "fp" "*,yes,*")]
+ (set_attr "arch" "simd,fp,simd")]
)
-(define_insn "*aarch64_combinez_be<mode>"
+(define_insn "@aarch64_combinez_be<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
- (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
- (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
+ (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
+ (match_operand:VDC 1 "general_operand" "w,?r,m")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"@
mov\\t%0.8b, %1.8b
fmov\t%d0, %1
ldr\\t%d0, %1"
[(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
- (set_attr "simd" "yes,*,yes")
- (set_attr "fp" "*,yes,*")]
+ (set_attr "arch" "simd,fp,simd")]
)
(define_expand "aarch64_combine<mode>"
}
)
-(define_expand "aarch64_simd_combine<mode>"
+(define_expand "@aarch64_simd_combine<mode>"
[(match_operand:<VDBL> 0 "register_operand")
(match_operand:VDC 1 "register_operand")
(match_operand:VDC 2 "register_operand")]
(define_expand "aarch64_saddl2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:VQW 1 "register_operand" "w")
- (match_operand:VQW 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQW 1 "register_operand")
+ (match_operand:VQW 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "aarch64_uaddl2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:VQW 1 "register_operand" "w")
- (match_operand:VQW 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQW 1 "register_operand")
+ (match_operand:VQW 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "aarch64_ssubl2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:VQW 1 "register_operand" "w")
- (match_operand:VQW 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQW 1 "register_operand")
+ (match_operand:VQW 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "aarch64_usubl2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:VQW 1 "register_operand" "w")
- (match_operand:VQW 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQW 1 "register_operand")
+ (match_operand:VQW 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
operands[2], p));
DONE;
;; <su><addsub>w<q>.
(define_expand "widen_ssum<mode>3"
- [(set (match_operand:<VDBLW> 0 "register_operand" "")
+ [(set (match_operand:<VDBLW> 0 "register_operand")
(plus:<VDBLW> (sign_extend:<VDBLW>
- (match_operand:VQW 1 "register_operand" ""))
- (match_operand:<VDBLW> 2 "register_operand" "")))]
+ (match_operand:VQW 1 "register_operand"))
+ (match_operand:<VDBLW> 2 "register_operand")))]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
)
(define_expand "widen_ssum<mode>3"
- [(set (match_operand:<VWIDE> 0 "register_operand" "")
+ [(set (match_operand:<VWIDE> 0 "register_operand")
(plus:<VWIDE> (sign_extend:<VWIDE>
- (match_operand:VD_BHSI 1 "register_operand" ""))
- (match_operand:<VWIDE> 2 "register_operand" "")))]
+ (match_operand:VD_BHSI 1 "register_operand"))
+ (match_operand:<VWIDE> 2 "register_operand")))]
"TARGET_SIMD"
{
emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
})
(define_expand "widen_usum<mode>3"
- [(set (match_operand:<VDBLW> 0 "register_operand" "")
+ [(set (match_operand:<VDBLW> 0 "register_operand")
(plus:<VDBLW> (zero_extend:<VDBLW>
- (match_operand:VQW 1 "register_operand" ""))
- (match_operand:<VDBLW> 2 "register_operand" "")))]
+ (match_operand:VQW 1 "register_operand"))
+ (match_operand:<VDBLW> 2 "register_operand")))]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
)
(define_expand "widen_usum<mode>3"
- [(set (match_operand:<VWIDE> 0 "register_operand" "")
+ [(set (match_operand:<VWIDE> 0 "register_operand")
(plus:<VWIDE> (zero_extend:<VWIDE>
- (match_operand:VD_BHSI 1 "register_operand" ""))
- (match_operand:<VWIDE> 2 "register_operand" "")))]
+ (match_operand:VD_BHSI 1 "register_operand"))
+ (match_operand:<VWIDE> 2 "register_operand")))]
"TARGET_SIMD"
{
emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
DONE;
})
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
+(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
- (ANY_EXTEND:<VWIDE>
- (match_operand:VD_BHSI 2 "register_operand" "w"))))]
+ (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_BHSI 2 "register_operand" "w"))))]
"TARGET_SIMD"
- "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
- [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+ "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_sub_widen")]
)
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
+(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
- (ANY_EXTEND:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQW 2 "register_operand" "w")
- (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
+ (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
"TARGET_SIMD"
- "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
- [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+ "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+ [(set_attr "type" "neon_sub_widen")]
)
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
+(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
- (ANY_EXTEND:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQW 2 "register_operand" "w")
- (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
+ (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
+ "TARGET_SIMD"
+ "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_sub_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
+ (match_operand:<VWIDE> 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_add_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+ (match_operand:<VWIDE> 1 "register_operand" "w")))]
"TARGET_SIMD"
- "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
- [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+ "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+ [(set_attr "type" "neon_add_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+ (match_operand:<VWIDE> 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_add_widen")]
)
(define_expand "aarch64_saddw2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQW 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQW 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "aarch64_uaddw2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQW 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQW 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
(define_expand "aarch64_ssubw2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQW 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQW 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "aarch64_usubw2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQW 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQW 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
;; <su><r>h<addsub>.
+(define_expand "<u>avg<mode>3_floor"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand")
+ (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
+ (match_operand:VDQ_BHSI 2 "register_operand")]
+ HADD))]
+ "TARGET_SIMD"
+)
+
+(define_expand "<u>avg<mode>3_ceil"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand")
+ (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
+ (match_operand:VDQ_BHSI 2 "register_operand")]
+ RHADD))]
+ "TARGET_SIMD"
+)
+
(define_insn "aarch64_<sur>h<addsub><mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
UNSPEC_FMULX))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
- INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
UNSPEC_FMULX))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_fp_mul_<Vetype><q>")]
UNSPEC_FMULX))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
}
[(set_attr "type" "fmul<Vetype>")]
VQDMULH))]
"TARGET_SIMD"
"*
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
VQDMULH))]
"TARGET_SIMD"
"*
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
VQDMULH))]
"TARGET_SIMD"
"*
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
VQDMULH))]
"TARGET_SIMD"
"*
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
SQRDMLH_AS))]
"TARGET_SIMD_RDMA"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
}
SQRDMLH_AS))]
"TARGET_SIMD_RDMA"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
}
SQRDMLH_AS))]
"TARGET_SIMD_RDMA"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
}
SQRDMLH_AS))]
"TARGET_SIMD_RDMA"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
}
(const_int 1))))]
"TARGET_SIMD"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
(const_int 1))))]
"TARGET_SIMD"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
(const_int 1))))]
"TARGET_SIMD"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
(const_int 1))))]
"TARGET_SIMD"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
)
(define_expand "aarch64_sqdmlal2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 3 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQ_HSI 2 "register_operand")
+ (match_operand:VQ_HSI 3 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
operands[2], operands[3], p));
DONE;
})
(define_expand "aarch64_sqdmlsl2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 3 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQ_HSI 2 "register_operand")
+ (match_operand:VQ_HSI 3 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
operands[2], operands[3], p));
DONE;
(const_int 1))))]
"TARGET_SIMD"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
(const_int 1))))]
"TARGET_SIMD"
{
- operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
)
(define_expand "aarch64_sqdmlal2_lane<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCOND> 3 "register_operand" "<vwx>")
- (match_operand:SI 4 "immediate_operand" "i")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQ_HSI 2 "register_operand")
+ (match_operand:<VCOND> 3 "register_operand")
+ (match_operand:SI 4 "immediate_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
})
(define_expand "aarch64_sqdmlal2_laneq<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
- (match_operand:SI 4 "immediate_operand" "i")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQ_HSI 2 "register_operand")
+ (match_operand:<VCONQ> 3 "register_operand")
+ (match_operand:SI 4 "immediate_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
})
(define_expand "aarch64_sqdmlsl2_lane<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCOND> 3 "register_operand" "<vwx>")
- (match_operand:SI 4 "immediate_operand" "i")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQ_HSI 2 "register_operand")
+ (match_operand:<VCOND> 3 "register_operand")
+ (match_operand:SI 4 "immediate_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
})
(define_expand "aarch64_sqdmlsl2_laneq<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
- (match_operand:SI 4 "immediate_operand" "i")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQ_HSI 2 "register_operand")
+ (match_operand:<VCONQ> 3 "register_operand")
+ (match_operand:SI 4 "immediate_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
)
(define_expand "aarch64_sqdmlal2_n<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VEL> 3 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQ_HSI 2 "register_operand")
+ (match_operand:<VEL> 3 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
})
(define_expand "aarch64_sqdmlsl2_n<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VEL> 3 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:<VWIDE> 1 "register_operand")
+ (match_operand:VQ_HSI 2 "register_operand")
+ (match_operand:<VEL> 3 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
(const_int 1)))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
(const_int 1)))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
(const_int 1)))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
(const_int 1)))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
)
(define_expand "aarch64_sqdmull2<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQ_HSI 1 "register_operand")
+ (match_operand:VQ_HSI 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
(const_int 1)))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
(const_int 1)))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
)
(define_expand "aarch64_sqdmull2_lane<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:<VCOND> 2 "register_operand" "<vwx>")
- (match_operand:SI 3 "immediate_operand" "i")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQ_HSI 1 "register_operand")
+ (match_operand:<VCOND> 2 "register_operand")
+ (match_operand:SI 3 "immediate_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
})
(define_expand "aarch64_sqdmull2_laneq<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
- (match_operand:SI 3 "immediate_operand" "i")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQ_HSI 1 "register_operand")
+ (match_operand:<VCONQ> 2 "register_operand")
+ (match_operand:SI 3 "immediate_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
)
(define_expand "aarch64_sqdmull2_n<mode>"
- [(match_operand:<VWIDE> 0 "register_operand" "=w")
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:<VEL> 2 "register_operand" "w")]
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (match_operand:VQ_HSI 1 "register_operand")
+ (match_operand:<VEL> 2 "register_operand")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
(clobber (reg:CC CC_REGNUM))]
"TARGET_SIMD"
"#"
- "reload_completed"
+ "&& reload_completed"
[(set (match_operand:DI 0 "register_operand")
(neg:DI
(COMPARISONS:DI
(clobber (reg:CC CC_REGNUM))]
"TARGET_SIMD"
"#"
- "reload_completed"
+ "&& reload_completed"
[(set (match_operand:DI 0 "register_operand")
(neg:DI
(UCOMPARISONS:DI
(clobber (reg:CC CC_REGNUM))]
"TARGET_SIMD"
"#"
- "reload_completed"
+ "&& reload_completed"
[(set (match_operand:DI 0 "register_operand")
(neg:DI
(ne:DI
;; sqrt
(define_expand "sqrt<mode>2"
- [(set (match_operand:VHSDF 0 "register_operand" "=w")
- (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand")
+ (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
"TARGET_SIMD"
{
if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
UNSPEC_LD2_LANE))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
}
[(set_attr "type" "neon_load2_one_lane")]
)
(define_expand "vec_load_lanesoi<mode>"
- [(set (match_operand:OI 0 "register_operand" "=w")
- (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+ [(set (match_operand:OI 0 "register_operand")
+ (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_LD2))]
"TARGET_SIMD"
if (BYTES_BIG_ENDIAN)
{
rtx tmp = gen_reg_rtx (OImode);
- rtx mask = aarch64_reverse_mask (<MODE>mode);
+ rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
}
UNSPEC_ST2_LANE))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
}
[(set_attr "type" "neon_store2_one_lane<q>")]
)
(define_expand "vec_store_lanesoi<mode>"
- [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
- (unspec:OI [(match_operand:OI 1 "register_operand" "w")
+ [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
+ (unspec:OI [(match_operand:OI 1 "register_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_ST2))]
"TARGET_SIMD"
if (BYTES_BIG_ENDIAN)
{
rtx tmp = gen_reg_rtx (OImode);
- rtx mask = aarch64_reverse_mask (<MODE>mode);
+ rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
}
UNSPEC_LD3_LANE))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
}
[(set_attr "type" "neon_load3_one_lane")]
)
(define_expand "vec_load_lanesci<mode>"
- [(set (match_operand:CI 0 "register_operand" "=w")
- (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
+ [(set (match_operand:CI 0 "register_operand")
+ (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_LD3))]
"TARGET_SIMD"
if (BYTES_BIG_ENDIAN)
{
rtx tmp = gen_reg_rtx (CImode);
- rtx mask = aarch64_reverse_mask (<MODE>mode);
+ rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
}
UNSPEC_ST3_LANE))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
}
[(set_attr "type" "neon_store3_one_lane<q>")]
)
(define_expand "vec_store_lanesci<mode>"
- [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
- (unspec:CI [(match_operand:CI 1 "register_operand" "w")
+ [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
+ (unspec:CI [(match_operand:CI 1 "register_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_ST3))]
"TARGET_SIMD"
if (BYTES_BIG_ENDIAN)
{
rtx tmp = gen_reg_rtx (CImode);
- rtx mask = aarch64_reverse_mask (<MODE>mode);
+ rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
}
UNSPEC_LD4_LANE))]
"TARGET_SIMD"
{
- operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+ operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
}
[(set_attr "type" "neon_load4_one_lane")]
)
(define_expand "vec_load_lanesxi<mode>"
- [(set (match_operand:XI 0 "register_operand" "=w")
- (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
+ [(set (match_operand:XI 0 "register_operand")
+ (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_LD4))]
"TARGET_SIMD"
if (BYTES_BIG_ENDIAN)
{
rtx tmp = gen_reg_rtx (XImode);
- rtx mask = aarch64_reverse_mask (<MODE>mode);
+ rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
}
UNSPEC_ST4_LANE))]
"TARGET_SIMD"
{
- operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
}
[(set_attr "type" "neon_store4_one_lane<q>")]
)
(define_expand "vec_store_lanesxi<mode>"
- [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
- (unspec:XI [(match_operand:XI 1 "register_operand" "w")
+ [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
+ (unspec:XI [(match_operand:XI 1 "register_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_ST4))]
"TARGET_SIMD"
if (BYTES_BIG_ENDIAN)
{
rtx tmp = gen_reg_rtx (XImode);
- rtx mask = aarch64_reverse_mask (<MODE>mode);
+ rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
}
;; Reload patterns for AdvSIMD register list operands.
(define_expand "mov<mode>"
- [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
- (match_operand:VSTRUCT 1 "general_operand" ""))]
+ [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
+ (match_operand:VSTRUCT 1 "general_operand"))]
"TARGET_SIMD"
{
if (can_create_pseudo_p ())
}
})
+
+(define_expand "aarch64_ld1x3<VALLDIF:mode>"
+ [(match_operand:CI 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ rtx mem = gen_rtx_MEM (CImode, operands[1]);
+ emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
+ DONE;
+})
+
+(define_insn "aarch64_ld1_x3_<mode>"
+ [(set (match_operand:CI 0 "register_operand" "=w")
+ (unspec:CI
+ [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
+ (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
+ "TARGET_SIMD"
+ "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
+ [(set_attr "type" "neon_load1_3reg<q>")]
+)
+
+(define_expand "aarch64_ld1x4<VALLDIF:mode>"
+ [(match_operand:XI 0 "register_operand" "=w")
+ (match_operand:DI 1 "register_operand" "r")
+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ rtx mem = gen_rtx_MEM (XImode, operands[1]);
+ emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
+ DONE;
+})
+
+(define_insn "aarch64_ld1_x4_<mode>"
+ [(set (match_operand:XI 0 "register_operand" "=w")
+ (unspec:XI
+ [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
+ (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
+ UNSPEC_LD1))]
+ "TARGET_SIMD"
+ "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
+ [(set_attr "type" "neon_load1_4reg<q>")]
+)
+
+(define_expand "aarch64_st1x2<VALLDIF:mode>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:OI 1 "register_operand")
+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ rtx mem = gen_rtx_MEM (OImode, operands[0]);
+ emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
+ DONE;
+})
+
+(define_insn "aarch64_st1_x2_<mode>"
+ [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
+ (unspec:OI
+ [(match_operand:OI 1 "register_operand" "w")
+ (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
+ "TARGET_SIMD"
+ "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
+ [(set_attr "type" "neon_store1_2reg<q>")]
+)
+
+(define_expand "aarch64_st1x3<VALLDIF:mode>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:CI 1 "register_operand")
+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ rtx mem = gen_rtx_MEM (CImode, operands[0]);
+ emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
+ DONE;
+})
+
+(define_insn "aarch64_st1_x3_<mode>"
+ [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
+ (unspec:CI
+ [(match_operand:CI 1 "register_operand" "w")
+ (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
+ "TARGET_SIMD"
+ "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
+ [(set_attr "type" "neon_store1_3reg<q>")]
+)
+
+(define_expand "aarch64_st1x4<VALLDIF:mode>"
+ [(match_operand:DI 0 "register_operand" "")
+ (match_operand:XI 1 "register_operand" "")
+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ rtx mem = gen_rtx_MEM (XImode, operands[0]);
+ emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
+ DONE;
+})
+
+(define_insn "aarch64_st1_x4_<mode>"
+ [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
+ (unspec:XI
+ [(match_operand:XI 1 "register_operand" "w")
+ (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
+ UNSPEC_ST1))]
+ "TARGET_SIMD"
+ "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
+ [(set_attr "type" "neon_store1_4reg<q>")]
+)
+
(define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
})
(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "=w")
- (match_operand:DI 1 "register_operand" "w")
+ [(match_operand:VSTRUCT 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
)
(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "=w")
- (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:VSTRUCT 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
(unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
})
(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "=w")
- (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:VSTRUCT 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
DONE;
})
+(define_expand "aarch64_ld1x2<VQ:mode>"
+ [(match_operand:OI 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ machine_mode mode = OImode;
+ rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+ emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
+ DONE;
+})
+
+(define_expand "aarch64_ld1x2<VDC:mode>"
+ [(match_operand:OI 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ machine_mode mode = OImode;
+ rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+ emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
+ DONE;
+})
+
+
(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "=w")
- (match_operand:DI 1 "register_operand" "w")
- (match_operand:VSTRUCT 2 "register_operand" "0")
- (match_operand:SI 3 "immediate_operand" "i")
+ [(match_operand:VSTRUCT 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:VSTRUCT 2 "register_operand")
+ (match_operand:SI 3 "immediate_operand")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
* <VSTRUCT:nregs>);
- aarch64_simd_lane_bounds (operands[3], 0,
- GET_MODE_NUNITS (<VALLDIF:MODE>mode),
- NULL);
+ aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
operands[0], mem, operands[2], operands[3]));
DONE;
;; D-register list.
(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
- [(match_operand:VDC 0 "register_operand" "=w")
- (match_operand:VSTRUCT 1 "register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
+ [(match_operand:VDC 0 "register_operand")
+ (match_operand:VSTRUCT 1 "register_operand")
+ (match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
{
int part = INTVAL (operands[2]);
;; Q-register list.
(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
- [(match_operand:VQ 0 "register_operand" "=w")
- (match_operand:VSTRUCT 1 "register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
+ [(match_operand:VQ 0 "register_operand")
+ (match_operand:VSTRUCT 1 "register_operand")
+ (match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
{
int part = INTVAL (operands[2]);
;; vec_perm support
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VALL_F16 0 "register_operand")
- (match_operand:VALL_F16 1 "register_operand")
- (match_operand:VALL_F16 2 "register_operand")
- (match_operand:<V_INT_EQUIV> 3)]
- "TARGET_SIMD"
-{
- if (aarch64_expand_vec_perm_const (operands[0], operands[1],
- operands[2], operands[3]))
- DONE;
- else
- FAIL;
-})
-
(define_expand "vec_perm<mode>"
[(match_operand:VB 0 "register_operand")
(match_operand:VB 1 "register_operand")
"TARGET_SIMD"
{
aarch64_expand_vec_perm (operands[0], operands[1],
- operands[2], operands[3]);
+ operands[2], operands[3], <nunits>);
DONE;
})
;; This instruction's pattern is generated directly by
;; aarch64_expand_vec_perm_const, so any changes to the pattern would
;; need corresponding changes there.
-(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
+(define_insn "aarch64_<PERMUTE:perm_insn><mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
(match_operand:VALL_F16 2 "register_operand" "w")]
PERMUTE))]
"TARGET_SIMD"
- "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_permute<q>")]
)
)
(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
- [(match_operand:DI 0 "register_operand" "r")
- (match_operand:VSTRUCT 1 "register_operand" "w")
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:VSTRUCT 1 "register_operand")
(unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
})
(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
- [(match_operand:DI 0 "register_operand" "r")
- (match_operand:VSTRUCT 1 "register_operand" "w")
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:VSTRUCT 1 "register_operand")
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
})
(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
- [(match_operand:DI 0 "register_operand" "r")
- (match_operand:VSTRUCT 1 "register_operand" "w")
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:VSTRUCT 1 "register_operand")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
(match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
;; extend them in arm_neon.h and insert the resulting Q-regs.
(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "+w")
- (match_operand:VSTRUCT 1 "register_operand" "0")
- (match_operand:VQ 2 "register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
+ [(match_operand:VSTRUCT 0 "register_operand")
+ (match_operand:VSTRUCT 1 "register_operand")
+ (match_operand:VQ 2 "register_operand")
+ (match_operand:SI 3 "immediate_operand")]
"TARGET_SIMD"
{
int part = INTVAL (operands[3]);
;; Standard pattern name vec_init<mode><Vel>.
(define_expand "vec_init<mode><Vel>"
- [(match_operand:VALL_F16 0 "register_operand" "")
+ [(match_operand:VALL_F16 0 "register_operand")
+ (match_operand 1 "" "")]
+ "TARGET_SIMD"
+{
+ aarch64_expand_vector_init (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_init<mode><Vhalf>"
+ [(match_operand:VQ_NO2E 0 "register_operand")
(match_operand 1 "" "")]
"TARGET_SIMD"
{
[(set_attr "type" "neon_load1_all_lanes")]
)
-(define_insn "aarch64_frecpe<mode>"
- [(set (match_operand:VHSDF 0 "register_operand" "=w")
- (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+(define_insn "aarch64_simd_ld1<mode>_x2"
+ [(set (match_operand:OI 0 "register_operand" "=w")
+ (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ UNSPEC_LD1))]
+ "TARGET_SIMD"
+ "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
+ [(set_attr "type" "neon_load1_2reg<q>")]
+)
+
+(define_insn "aarch64_simd_ld1<mode>_x2"
+ [(set (match_operand:OI 0 "register_operand" "=w")
+ (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+ (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ UNSPEC_LD1))]
+ "TARGET_SIMD"
+ "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
+ [(set_attr "type" "neon_load1_2reg<q>")]
+)
+
+
+(define_insn "@aarch64_frecpe<mode>"
+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+ (unspec:VHSDF_HSDF
+ [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
UNSPEC_FRECPE))]
"TARGET_SIMD"
- "frecpe\\t%0.<Vtype>, %1.<Vtype>"
+ "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_fp_recpe_<stype><q>")]
)
-(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
+(define_insn "aarch64_frecpx<mode>"
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
- FRECP))]
+ UNSPEC_FRECPX))]
"TARGET_SIMD"
- "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
- [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
+ "frecpx\t%<s>0, %<s>1"
+ [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
)
-(define_insn "aarch64_frecps<mode>"
+(define_insn "@aarch64_frecps<mode>"
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_HSDF
[(match_operand:VHSDF_HSDF 1 "register_operand" "w")
;; Standard pattern name vec_extract<mode><Vel>.
(define_expand "vec_extract<mode><Vel>"
- [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
- (match_operand:VALL_F16 1 "register_operand" "")
- (match_operand:SI 2 "immediate_operand" "")]
+ [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+ (match_operand:VALL_F16 1 "register_operand")
+ (match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
{
emit_insn
(define_insn "aarch64_crypto_aes<aes_op>v16qi"
[(set (match_operand:V16QI 0 "register_operand" "=w")
- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
- (match_operand:V16QI 2 "register_operand" "w")]
+ (unspec:V16QI
+ [(xor:V16QI
+ (match_operand:V16QI 1 "register_operand" "%0")
+ (match_operand:V16QI 2 "register_operand" "w"))]
CRYPTO_AES))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_AES"
"aes<aes_op>\\t%0.16b, %2.16b"
[(set_attr "type" "crypto_aese")]
)
-;; When AES/AESMC fusion is enabled we want the register allocation to
-;; look like:
-;; AESE Vn, _
-;; AESMC Vn, Vn
-;; So prefer to tie operand 1 to operand 0 when fusing.
-
(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
- [(set (match_operand:V16QI 0 "register_operand" "=w,w")
- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
+ [(set (match_operand:V16QI 0 "register_operand" "=w")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
CRYPTO_AESMC))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_AES"
"aes<aesmc_op>\\t%0.16b, %1.16b"
- [(set_attr "type" "crypto_aesmc")
- (set_attr_alternative "enabled"
- [(if_then_else (match_test
- "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
- (const_string "yes" )
- (const_string "no"))
- (const_string "yes")])]
+ [(set_attr "type" "crypto_aesmc")]
+)
+
+;; When AESE/AESMC fusion is enabled we really want to keep the two together
+;; and enforce the register dependency without scheduling or register
+;; allocation messing up the order or introducing moves inbetween.
+;; Mash the two together during combine.
+
+(define_insn "*aarch64_crypto_aese_fused"
+ [(set (match_operand:V16QI 0 "register_operand" "=w")
+ (unspec:V16QI
+ [(unspec:V16QI
+ [(xor:V16QI
+ (match_operand:V16QI 1 "register_operand" "%0")
+ (match_operand:V16QI 2 "register_operand" "w"))]
+ UNSPEC_AESE)]
+ UNSPEC_AESMC))]
+ "TARGET_SIMD && TARGET_AES
+ && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+ "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
+ [(set_attr "type" "crypto_aese")
+ (set_attr "length" "8")]
+)
+
+;; When AESD/AESIMC fusion is enabled we really want to keep the two together
+;; and enforce the register dependency without scheduling or register
+;; allocation messing up the order or introducing moves inbetween.
+;; Mash the two together during combine.
+
+(define_insn "*aarch64_crypto_aesd_fused"
+ [(set (match_operand:V16QI 0 "register_operand" "=w")
+ (unspec:V16QI
+ [(unspec:V16QI
+ [(xor:V16QI
+ (match_operand:V16QI 1 "register_operand" "%0")
+ (match_operand:V16QI 2 "register_operand" "w"))]
+ UNSPEC_AESD)]
+ UNSPEC_AESIMC))]
+ "TARGET_SIMD && TARGET_AES
+ && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+ "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
+ [(set_attr "type" "crypto_aese")
+ (set_attr "length" "8")]
)
;; sha1
(unspec:SI [(match_operand:SI 1
"register_operand" "w")]
UNSPEC_SHA1H))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
(parallel [(const_int 0)]))]
UNSPEC_SHA1H))]
- "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
+ "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
(parallel [(const_int 3)]))]
UNSPEC_SHA1H))]
- "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
+ "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SHA1SU1))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha1su1\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sha1_fast")]
)
(match_operand:SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SHA1))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha1<sha1_op>\\t%q0, %s2, %3.4s"
[(set_attr "type" "crypto_sha1_slow")]
)
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SHA1SU0))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha1su0\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sha1_xor")]
)
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SHA256))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha256h<sha256_op>\\t%q0, %q2, %3.4s"
[(set_attr "type" "crypto_sha256_slow")]
)
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SHA256SU0))]
- "TARGET_SIMD &&TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha256su0\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sha256_fast")]
)
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SHA256SU1))]
- "TARGET_SIMD &&TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha256su1\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sha256_slow")]
)
+;; sha512
+
+(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "w")
+ (match_operand:V2DI 3 "register_operand" "w")]
+ CRYPTO_SHA512))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
+ [(set_attr "type" "crypto_sha512")]
+)
+
+(define_insn "aarch64_crypto_sha512su0qv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "w")]
+ UNSPEC_SHA512SU0))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "sha512su0\\t%0.2d, %2.2d"
+ [(set_attr "type" "crypto_sha512")]
+)
+
+(define_insn "aarch64_crypto_sha512su1qv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "w")
+ (match_operand:V2DI 3 "register_operand" "w")]
+ UNSPEC_SHA512SU1))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "sha512su1\\t%0.2d, %2.2d, %3.2d"
+ [(set_attr "type" "crypto_sha512")]
+)
+
+;; sha3
+
+(define_insn "eor3q<mode>4"
+ [(set (match_operand:VQ_I 0 "register_operand" "=w")
+ (xor:VQ_I
+ (xor:VQ_I
+ (match_operand:VQ_I 2 "register_operand" "w")
+ (match_operand:VQ_I 3 "register_operand" "w"))
+ (match_operand:VQ_I 1 "register_operand" "w")))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+(define_insn "aarch64_rax1qv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (xor:V2DI
+ (rotate:V2DI
+ (match_operand:V2DI 2 "register_operand" "w")
+ (const_int 1))
+ (match_operand:V2DI 1 "register_operand" "w")))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "rax1\\t%0.2d, %1.2d, %2.2d"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+(define_insn "aarch64_xarqv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (rotatert:V2DI
+ (xor:V2DI
+ (match_operand:V2DI 1 "register_operand" "%w")
+ (match_operand:V2DI 2 "register_operand" "w"))
+ (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "xar\\t%0.2d, %1.2d, %2.2d, %3"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+(define_insn "bcaxq<mode>4"
+ [(set (match_operand:VQ_I 0 "register_operand" "=w")
+ (xor:VQ_I
+ (and:VQ_I
+ (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
+ (match_operand:VQ_I 2 "register_operand" "w"))
+ (match_operand:VQ_I 1 "register_operand" "w")))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+;; SM3
+
+(define_insn "aarch64_sm3ss1qv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
+ (match_operand:V4SI 2 "register_operand" "w")
+ (match_operand:V4SI 3 "register_operand" "w")]
+ UNSPEC_SM3SS1))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
+ [(set_attr "type" "crypto_sm3")]
+)
+
+
+(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "register_operand" "w")
+ (match_operand:V4SI 3 "register_operand" "w")
+ (match_operand:SI 4 "aarch64_imm2" "Ui2")]
+ CRYPTO_SM3TT))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
+ [(set_attr "type" "crypto_sm3")]
+)
+
+(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "register_operand" "w")
+ (match_operand:V4SI 3 "register_operand" "w")]
+ CRYPTO_SM3PART))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
+ [(set_attr "type" "crypto_sm3")]
+)
+
+;; SM4
+
+(define_insn "aarch64_sm4eqv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "register_operand" "w")]
+ UNSPEC_SM4E))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm4e\\t%0.4s, %2.4s"
+ [(set_attr "type" "crypto_sm4")]
+)
+
+(define_insn "aarch64_sm4ekeyqv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
+ (match_operand:V4SI 2 "register_operand" "w")]
+ UNSPEC_SM4EKEY))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm4ekey\\t%0.4s, %1.4s, %2.4s"
+ [(set_attr "type" "crypto_sm4")]
+)
+
+;; fp16fml
+
+(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand")
+ (unspec:VDQSF
+ [(match_operand:VDQSF 1 "register_operand")
+ (match_operand:<VFMLA_W> 2 "register_operand")
+ (match_operand:<VFMLA_W> 3 "register_operand")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
+ <nunits> * 2, false);
+ rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
+ <nunits> * 2, false);
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, p2));
+ DONE;
+
+})
+
+(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand")
+ (unspec:VDQSF
+ [(match_operand:VDQSF 1 "register_operand")
+ (match_operand:<VFMLA_W> 2 "register_operand")
+ (match_operand:<VFMLA_W> 3 "register_operand")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
+ rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, p2));
+ DONE;
+})
+
+(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (fma:VDQSF
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")
+ (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
+ (match_operand:VDQSF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (fma:VDQSF
+ (float_extend:VDQSF
+ (neg:<VFMLA_SEL_W>
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")
+ (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
+ (match_operand:VDQSF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (fma:VDQSF
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")
+ (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
+ (match_operand:VDQSF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (fma:VDQSF
+ (float_extend:VDQSF
+ (neg:<VFMLA_SEL_W>
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")
+ (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
+ (match_operand:VDQSF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+ (match_operand:V4HF 2 "register_operand")
+ (match_operand:V4HF 3 "register_operand")
+ (match_operand:SI 4 "aarch64_imm2")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
+ rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+}
+)
+
+(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+ (match_operand:V4HF 2 "register_operand")
+ (match_operand:V4HF 3 "register_operand")
+ (match_operand:SI 4 "aarch64_imm2")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
+ rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (neg:V2HF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlal_lane_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (neg:V2HF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+ (match_operand:V8HF 2 "register_operand")
+ (match_operand:V8HF 3 "register_operand")
+ (match_operand:SI 4 "aarch64_lane_imm3")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
+ rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+ (match_operand:V8HF 2 "register_operand")
+ (match_operand:V8HF 3 "register_operand")
+ (match_operand:SI 4 "aarch64_lane_imm3")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
+ rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (neg:V4HF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (neg:V4HF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+ (match_operand:V4HF 2 "register_operand")
+ (match_operand:V8HF 3 "register_operand")
+ (match_operand:SI 4 "aarch64_lane_imm3")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
+ rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+
+})
+
+(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+ (match_operand:V4HF 2 "register_operand")
+ (match_operand:V8HF 3 "register_operand")
+ (match_operand:SI 4 "aarch64_lane_imm3")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
+ rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+
+})
+
+(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (neg:V2HF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (neg:V2HF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+ (match_operand:V8HF 2 "register_operand")
+ (match_operand:V4HF 3 "register_operand")
+ (match_operand:SI 4 "aarch64_imm2")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
+ rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+ (match_operand:V8HF 2 "register_operand")
+ (match_operand:V4HF 3 "register_operand")
+ (match_operand:SI 4 "aarch64_imm2")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
+ rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (neg:V4HF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (neg:V4HF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
;; pmull
(define_insn "aarch64_crypto_pmulldi"
(unspec:TI [(match_operand:DI 1 "register_operand" "w")
(match_operand:DI 2 "register_operand" "w")]
UNSPEC_PMULL))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_AES"
"pmull\\t%0.1q, %1.1d, %2.1d"
[(set_attr "type" "crypto_pmull")]
)
(unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
(match_operand:V2DI 2 "register_operand" "w")]
UNSPEC_PMULL2))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_AES"
"pmull2\\t%0.1q, %1.2d, %2.2d"
[(set_attr "type" "crypto_pmull")]
)
+
+;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
+(define_insn "<optab><Vnarrowq><mode>2"
+ [(set (match_operand:VQN 0 "register_operand" "=w")
+ (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
+ [(set_attr "type" "neon_shift_imm_long")]
+)
+
+;; Truncate a 128-bit integer vector to a 64-bit vector.
+(define_insn "trunc<mode><Vnarrowq>2"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+ (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "xtn\t%0.<Vntype>, %1.<Vtype>"
+ [(set_attr "type" "neon_shift_imm_narrow_q")]
+)