;; <http://www.gnu.org/licenses/>.
(define_expand "mov<mode>"
- [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
- (match_operand:VALL_F16 1 "general_operand"))]
+ [(set (match_operand:VMOVE 0 "nonimmediate_operand")
+ (match_operand:VMOVE 1 "general_operand"))]
"TARGET_FLOAT"
- "
+{
/* Force the operand into a register if it is not an
immediate whose use can be replaced with xzr.
If the mode is 16 bytes wide, then we will be doing
aarch64_expand_vector_init (operands[0], operands[1]);
DONE;
}
- "
-)
+})
(define_expand "movmisalign<mode>"
- [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
- (match_operand:VALL_F16 1 "general_operand"))]
+ [(set (match_operand:VMOVE 0 "nonimmediate_operand")
+ (match_operand:VMOVE 1 "general_operand"))]
"TARGET_FLOAT && !STRICT_ALIGNMENT"
{
/* This pattern is not permitted to fail during expansion: if both arguments
[(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
)
+(define_insn "aarch64_simd_dupv2hf"
+ [(set (match_operand:V2HF 0 "register_operand" "=w")
+ (vec_duplicate:V2HF
+ (match_operand:HF 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "@
+ sli\\t%d0, %d0, 16"
+ [(set_attr "type" "neon_shift_imm")]
+)
+
(define_insn "aarch64_simd_dup<mode>"
[(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
(vec_duplicate:VDQF_F16
)
(define_insn "aarch64_dup_lane<mode>"
- [(set (match_operand:VALL_F16 0 "register_operand" "=w")
- (vec_duplicate:VALL_F16
+ [(set (match_operand:VMOVE 0 "register_operand" "=w")
+ (vec_duplicate:VMOVE
(vec_select:<VEL>
- (match_operand:VALL_F16 1 "register_operand" "w")
+ (match_operand:VMOVE 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
(set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
)
+(define_insn "*aarch64_simd_movv2hf"
+ [(set (match_operand:V2HF 0 "nonimmediate_operand"
+ "=w, m, m, w, ?r, ?w, ?r, w, w")
+ (match_operand:V2HF 1 "general_operand"
+ "m, Dz, w, w, w, r, r, Dz, Dn"))]
+ "TARGET_SIMD_F16INST
+ && (register_operand (operands[0], V2HFmode)
+ || aarch64_simd_reg_or_zero (operands[1], V2HFmode))"
+ "@
+ ldr\\t%s0, %1
+ str\\twzr, %0
+ str\\t%s1, %0
+ mov\\t%0.2s[0], %1.2s[0]
+ umov\\t%w0, %1.s[0]
+ fmov\\t%s0, %w1
+ mov\\t%w0, %w1
+ movi\\t%d0, 0
+ * return aarch64_output_simd_mov_immediate (operands[1], 32);"
+ [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\
+ neon_logic, neon_to_gp, f_mcr,\
+ mov_reg, neon_move, neon_move")]
+)
+
(define_insn "*aarch64_simd_mov<VQMOV:mode>"
[(set (match_operand:VQMOV 0 "nonimmediate_operand"
"=w, Umn, m, w, ?r, ?w, ?r, w, w")
(define_insn "aarch64_store_lane0<mode>"
[(set (match_operand:<VEL> 0 "memory_operand" "=m")
- (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
+ (vec_select:<VEL> (match_operand:VMOVE 1 "register_operand" "w")
(parallel [(match_operand 2 "const_int_operand" "n")])))]
"TARGET_SIMD
&& ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
)
(define_insn "aarch64_simd_vec_set<mode>"
- [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
- (vec_merge:VALL_F16
- (vec_duplicate:VALL_F16
+ [(set (match_operand:VMOVE 0 "register_operand" "=w,w,w")
+ (vec_merge:VMOVE
+ (vec_duplicate:VMOVE
(match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
- (match_operand:VALL_F16 3 "register_operand" "0,0,0")
+ (match_operand:VMOVE 3 "register_operand" "0,0,0")
(match_operand:SI 2 "immediate_operand" "i,i,i")))]
"TARGET_SIMD"
{
)
(define_insn "@aarch64_simd_vec_copy_lane<mode>"
- [(set (match_operand:VALL_F16 0 "register_operand" "=w")
- (vec_merge:VALL_F16
- (vec_duplicate:VALL_F16
+ [(set (match_operand:VMOVE 0 "register_operand" "=w")
+ (vec_merge:VMOVE
+ (vec_duplicate:VMOVE
(vec_select:<VEL>
- (match_operand:VALL_F16 3 "register_operand" "w")
+ (match_operand:VMOVE 3 "register_operand" "w")
(parallel
[(match_operand:SI 4 "immediate_operand" "i")])))
- (match_operand:VALL_F16 1 "register_operand" "0")
+ (match_operand:VMOVE 1 "register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
{
)
(define_expand "vec_set<mode>"
- [(match_operand:VALL_F16 0 "register_operand")
+ [(match_operand:VMOVE 0 "register_operand")
(match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
(match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
(define_expand "reduc_<optab>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
- (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
+ (unspec:<VEL> [(match_operand:VHSDF_P 1 "register_operand")]
FMAXMINV)]
"TARGET_SIMD"
{
(define_expand "reduc_<fmaxmin>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
- (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
+ (unspec:<VEL> [(match_operand:VHSDF_P 1 "register_operand")]
FMAXMINNMV)]
"TARGET_SIMD"
{
)
(define_insn "aarch64_reduc_<optab>_internal<mode>"
- [(set (match_operand:VHSDF 0 "register_operand" "=w")
- (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+ [(set (match_operand:VHSDF_P 0 "register_operand" "=w")
+ (unspec:VHSDF_P [(match_operand:VHSDF_P 1 "register_operand" "w")]
FMAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
(define_insn_and_split "aarch64_get_lane<mode>"
[(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
(vec_select:<VEL>
- (match_operand:VALL_F16 1 "register_operand" "w, w, w")
+ (match_operand:VMOVE 1 "register_operand" "w, w, w")
(parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
"TARGET_SIMD"
{
;; Standard pattern name vec_init<mode><Vel>.
(define_expand "vec_init<mode><Vel>"
- [(match_operand:VALL_F16 0 "register_operand")
+ [(match_operand:VMOVE 0 "register_operand")
(match_operand 1 "" "")]
"TARGET_SIMD"
{
(define_expand "vec_extract<mode><Vel>"
[(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
- (match_operand:VALL_F16 1 "register_operand")
+ (match_operand:VMOVE 1 "register_operand")
(match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
{
(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF])
+;; Advanced SIMD Float modes suitable for reduction or pairwise operations
+(define_mode_iterator VHSDF_P [(V4HF "TARGET_SIMD_F16INST")
+ (V8HF "TARGET_SIMD_F16INST")
+ V2SF V4SF V2DF (V2HF "TARGET_SIMD_F16INST")])
;; Advanced SIMD Float modes, and DF.
(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
;; Advanced SIMD Float modes with 2 elements.
-(define_mode_iterator V2F [V2SF V2DF])
+(define_mode_iterator V2F [V2SF V2DF V2HF])
;; All Advanced SIMD modes on which we support any arithmetic operations.
(define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF])
-;; All Advanced SIMD modes suitable for moving, loading, and storing.
+;; The set of all modes for which vld1 intrinsics are provided.
(define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
+;; All Advanced SIMD modes suitable for moving, loading, and storing
+;; including V2HF
+(define_mode_iterator VMOVE [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
+ V4HF V8HF V4BF V8BF V2SF V4SF V2DF
+ (V2HF "TARGET_SIMD_F16INST")])
+
+
;; The VALL_F16 modes except the 128-bit 2-element ones.
(define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
V4HF V8HF V2SF V4SF])
(V2SF "2") (V4SF "4")
(V1DF "1") (V2DF "2")
(DI "1") (DF "1")
- (V8DI "8")])
+ (V8DI "8") (V2HF "2")])
;; Map a mode to the number of bits in it, if the size of the mode
;; is constant.
(define_mode_attr Vetype [(V8QI "b") (V16QI "b")
(V4HI "h") (V8HI "h")
(V2SI "s") (V4SI "s")
- (V2DI "d")
+ (V2DI "d") (V2HF "h")
(V4HF "h") (V8HF "h")
(V2SF "s") (V4SF "s")
(V2DF "d")
;; more accurately.
(define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s")
(V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s")
- (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d")
+ (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d") (V2HF "s")
(HF "s") (SF "s") (DF "d") (QI "b") (HI "s")
(SI "s") (DI "d")])
(V4HF "HF") (V8HF "HF")
(V2SF "SF") (V4SF "SF")
(DF "DF") (V2DF "DF")
- (SI "SI") (HI "HI")
- (QI "QI")
+ (SI "SI") (V2HF "HF")
+ (QI "QI") (HI "HI")
(V4BF "BF") (V8BF "BF")
(VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
(VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
(V2SF "sf") (V4SF "sf")
(V2DF "df") (DF "df")
(SI "si") (HI "hi")
- (QI "qi")
+ (QI "qi") (V2HF "hf")
(V4BF "bf") (V8BF "bf")
(VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
(VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
(V4HF "") (V8HF "_q")
(V4BF "") (V8BF "_q")
(V2SF "") (V4SF "_q")
- (V2DF "_q")
+ (V2HF "") (V2DF "_q")
(QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")
(V2x8QI "") (V2x16QI "_q")
(V2x4HI "") (V2x8HI "_q")
(V2SI "p") (V4SI "v")
(V2DI "p") (V2DF "p")
(V2SF "p") (V4SF "v")
+ (V2HF "p")
(V4HF "v") (V8HF "v")])
(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")