From 3ed5677bb61b334a2d01c769859cdd3279e12a07 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed, 10 May 2023 12:00:17 +0100
Subject: [PATCH] [PATCH] aarch64: PR target/99195 annotate simple permutation
 patterns for vec-concat-zero

Another straightforward patch annotating patterns for the zip1, zip2, uzp1, uzp2, rev* instructions, plus tests.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

gcc/ChangeLog:

	PR target/99195
	* config/aarch64/aarch64-simd.md (aarch64_<PERMUTE:perm_insn><mode>):
	Rename to...
	(aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>): ... This.
	(aarch64_rev<REVERSE:rev_op><mode>): Rename to...
	(aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>): ... This.

gcc/testsuite/ChangeLog:

	PR target/99195
	* gcc.target/aarch64/simd/pr99195_1.c: Add tests for zip and rev
	intrinsics.
---
 gcc/config/aarch64/aarch64-simd.md            |  4 ++--
 .../gcc.target/aarch64/simd/pr99195_1.c       | 20 +++++++++++++------
 2 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index dc6efa0fe815..500d92c05c3e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7990,7 +7990,7 @@
 ;; This instruction's pattern is generated directly by
 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
 ;; need corresponding changes there.
-(define_insn "aarch64_<PERMUTE:perm_insn><mode>"
+(define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
 			  (match_operand:VALL_F16 2 "register_operand" "w")]
@@ -8022,7 +8022,7 @@
 ;; This instruction's pattern is generated directly by
 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
 ;; need corresponding changes there.
-(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
+(define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
                     REVERSE))]
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 4e6b3412749c..fde501d28e36 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -65,10 +65,18 @@ OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn
 OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
+OPFOUR (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2)
+OPFOUR (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2)
+OPFOUR (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2)
+
 OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 OPNINETEEN (uint32, 2, 4, u32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
+OPFOUR (uint8, 8, 16, u8, zip1, zip2, uzp1, uzp2)
+OPFOUR (uint16, 4, 8, u16, zip1, zip2, uzp1, uzp2)
+OPFOUR (uint32, 2, 4, u32, zip1, zip2, uzp1, uzp2)
+
 OPFOURTEEN (float32, 2, 4, f32, add, padd, sub, mul, div, max, maxnm, min, minnm, abd, pmax, pmin, pmaxnm, pminnm)
 
 #define UNARY(OT,IT,OP,S)			\
@@ -81,13 +89,13 @@ foo_##IT##OP##_##S (IT a)                     \
 
 #undef FUNC
 #define FUNC(T,IS,OS,OP,S) UNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
-OPSEVEN (int8, 8, 16, s8, neg, abs, rbit, clz, cls, cnt, mvn)
-OPFIVE (int16, 4, 8, s16, neg, abs, clz, cls, mvn)
-OPFIVE (int32, 2, 4, s32, neg, abs, clz, cls, mvn)
+OPTEN (int8, 8, 16, s8, neg, abs, rbit, clz, cls, cnt, mvn, rev16, rev32, rev64)
+OPSEVEN (int16, 4, 8, s16, neg, abs, clz, cls, mvn, rev32, rev64)
+OPSIX (int32, 2, 4, s32, neg, abs, clz, cls, mvn, rev64)
 
-OPFIVE (uint8, 8, 16, u8, rbit, clz, cnt, cls, mvn)
-OPTHREE (uint16, 4, 8, u16, clz, cls, mvn)
-OPTHREE (uint32, 2, 4, u32, clz, cls, mvn)
+OPEIGHT (uint8, 8, 16, u8, rbit, clz, cnt, cls, mvn, rev16, rev32, rev64)
+OPFIVE (uint16, 4, 8, u16, clz, cls, mvn, rev32, rev64)
+OPFOUR (uint32, 2, 4, u32, clz, cls, mvn, rev64)
 
 OPTEN (float32, 2, 4, f32, neg, abs, sqrt, rnd, rndi, rndm, rnda, rndn, rndp, rndx)
 /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
-- 
2.47.2