;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
+;; The following define_subst rules are used to produce patterns representing
+;; the implicit zeroing effect of 64-bit Advanced SIMD operations, in effect
+;; a vec_concat with zeroes. The order of the vec_concat operands differs
+;; for big-endian so we have a separate define_subst rule for each endianness.
+(define_subst "add_vec_concat_subst_le"
+ [(set (match_operand:VDZ 0)
+ (match_operand:VDZ 1))]
+ "!BYTES_BIG_ENDIAN"
+ [(set (match_operand:<VDBL> 0)
+ (vec_concat:<VDBL>
+ (match_dup 1)
+ (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")))])
+
+(define_subst "add_vec_concat_subst_be"
+ [(set (match_operand:VDZ 0)
+ (match_operand:VDZ 1))]
+ "BYTES_BIG_ENDIAN"
+ [(set (match_operand:<VDBL> 0)
+ (vec_concat:<VDBL>
+ (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")
+ (match_dup 1)))])
+
+;; The subst_attr definitions used to annotate patterns further in the file.
+;; Patterns that need to have the above substitutions added to them should
+;; have <vczle><vczbe> added to their name.
+(define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
+(define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
+
(define_expand "mov<mode>"
[(set (match_operand:VALL_F16 0 "nonimmediate_operand")
(match_operand:VALL_F16 1 "general_operand"))]
[(set_attr "type" "neon_logic<q>")]
)
-(define_insn "add<mode>3"
+(define_insn "add<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")))]
[(set_attr "type" "neon_add<q>")]
)
-(define_insn "sub<mode>3"
+(define_insn "sub<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")))]
[(set_attr "type" "neon_sub<q>")]
)
-(define_insn "mul<mode>3"
+(define_insn "mul<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
(match_operand:VDQ_BHSI 2 "register_operand" "w")))]
)
;; For AND (vector, register) and BIC (vector, immediate)
-(define_insn "and<mode>3"
+(define_insn "and<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
(match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
)
;; For ORR (vector, register) and ORR (vector, immediate)
-(define_insn "ior<mode>3"
+(define_insn "ior<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
(match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
[(set_attr "type" "neon_logic<q>")]
)
-(define_insn "xor<mode>3"
+(define_insn "xor<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")))]
--- /dev/null
+/* PR target/99195. */
+/* Check that we take advantage of 64-bit Advanced SIMD operations clearing
+ the top half of the vector register and no explicit zeroing instructions
+ are emitted. */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define ONE(OT,IT,OP,S) \
+OT \
+foo_##OP##_##S (IT a, IT b) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b), zeros); \
+}
+
+#define FUNC(T,IS,OS,OP,S) ONE (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+#define OPTWO(T,IS,OS,S,OP1,OP2) \
+FUNC (T, IS, OS, OP1, S) \
+FUNC (T, IS, OS, OP2, S)
+
+#define OPTHREE(T, IS, OS, S, OP1, OP2, OP3) \
+FUNC (T, IS, OS, OP1, S) \
+OPTWO (T, IS, OS, S, OP2, OP3)
+
+#define OPFOUR(T,IS,OS,S,OP1,OP2,OP3,OP4) \
+FUNC (T, IS, OS, OP1, S) \
+OPTHREE (T, IS, OS, S, OP2, OP3, OP4)
+
+#define OPFIVE(T,IS,OS,S,OP1,OP2,OP3,OP4, OP5) \
+FUNC (T, IS, OS, OP1, S) \
+OPFOUR (T, IS, OS, S, OP2, OP3, OP4, OP5)
+
+#define OPSIX(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6) \
+FUNC (T, IS, OS, OP1, S) \
+OPFIVE (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6)
+
+OPSIX (int8, 8, 16, s8, add, sub, mul, and, orr, eor)
+OPSIX (int16, 4, 8, s16, add, sub, mul, and, orr, eor)
+OPSIX (int32, 2, 4, s32, add, sub, mul, and, orr, eor)
+
+OPSIX (uint8, 8, 16, u8, add, sub, mul, and, orr, eor)
+OPSIX (uint16, 4, 8, u16, add, sub, mul, and, orr, eor)
+OPSIX (uint32, 2, 4, u32, add, sub, mul, and, orr, eor)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+