}
)
-(define_insn "aarch64_<su>aba<mode>"
+(define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(plus:VDQ_BHSI (minus:VDQ_BHSI
(USMAX:VDQ_BHSI
)
-(define_insn "aarch64_mla<mode>"
+(define_insn "aarch64_mla<mode><vczle><vczbe>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(plus:VDQ_BHSI (mult:VDQ_BHSI
(match_operand:VDQ_BHSI 2 "register_operand" "w")
[(set_attr "type" "neon_mla_<Vetype><q>")]
)
-(define_insn "*aarch64_mla_elt<mode>"
+(define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_mla_n<mode>"
+(define_insn "aarch64_mla_n<mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_mls<mode>"
+(define_insn "aarch64_mls<mode><vczle><vczbe>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
(mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
[(set_attr "type" "neon_mla_<Vetype><q>")]
)
-(define_insn "*aarch64_mls_elt<mode>"
+(define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(minus:VDQHS
(match_operand:VDQHS 4 "register_operand" "0")
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(minus:VDQHS
(match_operand:VDQHS 4 "register_operand" "0")
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_mls_n<mode>"
+(define_insn "aarch64_mls_n<mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(minus:VDQHS
(match_operand:VDQHS 1 "register_operand" "0")
}
)
-(define_insn "fma<mode>4"
+(define_insn "fma<mode>4<vczle><vczbe>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")
[(set_attr "type" "neon_fp_mla_<stype><q>")]
)
-(define_insn "*aarch64_fma4_elt<mode>"
+(define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(fma:VDQF
(vec_duplicate:VDQF
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(vec_duplicate:VDQSF
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_fma4_elt_from_dup<mode>"
+(define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(fma:VMUL
(vec_duplicate:VMUL
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
)
-(define_insn "fnma<mode>4"
+(define_insn "fnma<mode>4<vczle><vczbe>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(fma:VHSDF
(neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
[(set_attr "type" "neon_fp_mla_<stype><q>")]
)
-(define_insn "*aarch64_fnma4_elt<mode>"
+(define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(fma:VDQF
(neg:VDQF
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(neg:VDQSF
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
+(define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(fma:VMUL
(neg:VMUL
;; Some forms of straight-line code may generate the equivalent form
;; in *aarch64_simd_bsl<mode>_alt.
-(define_insn "aarch64_simd_bsl<mode>_internal"
+(define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
(xor:VDQ_I
(and:VDQ_I
;; the first. The two are equivalent but since recog doesn't try all
;; permutations of commutative operations, we have to have a separate pattern.
-(define_insn "*aarch64_simd_bsl<mode>_alt"
+(define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
(xor:VDQ_I
(and:VDQ_I
--- /dev/null
+/* PR target/99195. */
+/* Check that we take advantage of 64-bit Advanced SIMD operations clearing
+ the top half of the vector register and no explicit zeroing instructions
+ are emitted. */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define TERNARY(OT,IT,OP,S) \
+OT \
+foo_##OP##_##S (IT a, IT b, IT c) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c), zeros); \
+}
+
+#define FUNC(T,IS,OS,OP,S) TERNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+#define OPTWO(T,IS,OS,S,OP1,OP2) \
+FUNC (T, IS, OS, OP1, S) \
+FUNC (T, IS, OS, OP2, S)
+
+#define OPTHREE(T, IS, OS, S, OP1, OP2, OP3) \
+FUNC (T, IS, OS, OP1, S) \
+OPTWO (T, IS, OS, S, OP2, OP3)
+
+#define OPFOUR(T,IS,OS,S,OP1,OP2,OP3,OP4) \
+FUNC (T, IS, OS, OP1, S) \
+OPTHREE (T, IS, OS, S, OP2, OP3, OP4)
+
+OPTHREE (int8, 8, 16, s8, mla, mls, aba)
+OPTHREE (int16, 4, 8, s16, mla, mls, aba)
+OPTHREE (int32, 2, 4, s32, mla, mls, aba)
+
+OPFOUR (uint8, 8, 16, u8, mla, mls, aba, bsl)
+OPFOUR (uint16, 4, 8, u16, mla, mls, aba, bsl)
+OPFOUR (uint32, 2, 4, u32, mla, mls, aba, bsl)
+
+OPTHREE (float32, 2, 4, f32, mla, fma, fms)
+
+#undef FUNC
+#define TERNARY_LANE(OT,IT,OP,S) \
+OT \
+foo_##OP##_##S (IT a, IT b, IT c) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c, 0), zeros); \
+} \
+OT \
+foo_##OP##_##S##_lane1 (IT a, IT b, IT c) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c, 1), zeros); \
+}
+
+#define FUNC(T,IS,OS,OP,S) TERNARY_LANE (T##x##OS##_t, T##x##IS##_t, OP, S)
+OPTWO (int16, 4, 8, s16, mla_lane, mls_lane)
+OPTWO (int32, 2, 4, s32, mla_lane, mls_lane)
+
+OPTWO (uint16, 4, 8, u16, mla_lane, mls_lane)
+OPTWO (uint32, 2, 4, u32, mla_lane, mls_lane)
+
+OPTHREE (float32, 2, 4, f32, mla_lane, fma_lane, fms_lane)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+