From 889a0791c632aa2804c4e01cc7dddca1ae0d229c Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Fri, 28 Apr 2023 09:33:16 +0100
Subject: [PATCH] aarch64: PR target/99195 annotate more integer unary patterns
 for vec-concat with zero

More of the straightforward cases to annotate plus tests, this time for simple integer unary ops.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

gcc/ChangeLog:

	PR target/99195
	* config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): Rename to...
	(aarch64_rbit<mode><vczle><vczbe>): ... This.
	(neg<mode>2): Rename to...
	(neg<mode>2<vczle><vczbe>): ... This.
	(abs<mode>2): Rename to...
	(abs<mode>2<vczle><vczbe>): ... This.
	(aarch64_abs<mode>): Rename to...
	(aarch64_abs<mode><vczle><vczbe>): ... This.
	(one_cmpl<mode>2): Rename to...
	(one_cmpl<mode>2<vczle><vczbe>): ... This.
	(clrsb<mode>2): Rename to...
	(clrsb<mode>2<vczle><vczbe>): ... This.
	(clz<mode>2): Rename to...
	(clz<mode>2<vczle><vczbe>): ... This.
	(popcount<mode>2): Rename to...
	(popcount<mode>2<vczle><vczbe>): ... This.

gcc/testsuite/ChangeLog:

	PR target/99195
	* gcc.target/aarch64/simd/pr99195_1.c: Add tests for unary integer ops.
---
 gcc/config/aarch64/aarch64-simd.md            | 16 ++++++------
 .../gcc.target/aarch64/simd/pr99195_1.c       | 26 +++++++++++++++++--
 2 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1e7295428054..0a1f12cc3d02 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -496,7 +496,7 @@
   [(set_attr "type" "neon_rev<q>")]
 )
 
-(define_insn "aarch64_rbit<mode>"
+(define_insn "aarch64_rbit<mode><vczle><vczbe>"
   [(set (match_operand:VB 0 "register_operand" "=w")
 	(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 		   UNSPEC_RBIT))]
@@ -881,7 +881,7 @@
   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 )
 
-(define_insn "neg<mode>2"
+(define_insn "neg<mode>2<vczle><vczbe>"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 	(neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
   "TARGET_SIMD"
@@ -889,7 +889,7 @@
   [(set_attr "type" "neon_neg<q>")]
 )
 
-(define_insn "abs<mode>2"
+(define_insn "abs<mode>2<vczle><vczbe>"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
   "TARGET_SIMD"
@@ -900,7 +900,7 @@
 ;; The intrinsic version of integer ABS must not be allowed to
 ;; combine with any operation with an integrated ABS step, such
 ;; as SABD.
-(define_insn "aarch64_abs<mode>"
+(define_insn "aarch64_abs<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 	  (unspec:VSDQ_I_DI
 	    [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
@@ -1174,7 +1174,7 @@
   [(set_attr "type" "neon_logic<q>")]
 )
 
-(define_insn "one_cmpl<mode>2"
+(define_insn "one_cmpl<mode>2<vczle><vczbe>"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
   "TARGET_SIMD"
@@ -3691,7 +3691,7 @@
   [(set_attr "type" "neon_reduc_add<q>")]
 )
 
-(define_insn "clrsb<mode>2"
+(define_insn "clrsb<mode>2<vczle><vczbe>"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
   "TARGET_SIMD"
@@ -3699,7 +3699,7 @@
   [(set_attr "type" "neon_cls<q>")]
 )
 
-(define_insn "clz<mode>2"
+(define_insn "clz<mode>2<vczle><vczbe>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
  "TARGET_SIMD"
@@ -3707,7 +3707,7 @@
   [(set_attr "type" "neon_cls<q>")]
 )
 
-(define_insn "popcount<mode>2"
+(define_insn "popcount<mode>2<vczle><vczbe>"
   [(set (match_operand:VB 0 "register_operand" "=w")
         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
   "TARGET_SIMD"
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 5304e14b573a..96834425d519 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -7,7 +7,7 @@
 
 #include <arm_neon.h>
 
-#define ONE(OT,IT,OP,S)                         \
+#define BINARY(OT,IT,OP,S)                         \
 OT                                              \
 foo_##OP##_##S (IT a, IT b)                     \
 {                                               \
@@ -15,7 +15,7 @@ foo_##OP##_##S (IT a, IT b)                     \
   return vcombine_##S (v##OP##_##S (a, b), zeros);      \
 }
 
-#define FUNC(T,IS,OS,OP,S) ONE (T##x##OS##_t, T##x##IS##_t, OP, S)
+#define FUNC(T,IS,OS,OP,S) BINARY (T##x##OS##_t, T##x##IS##_t, OP, S)
 
 #define OPTWO(T,IS,OS,S,OP1,OP2)        \
 FUNC (T, IS, OS, OP1, S)                \
@@ -37,6 +37,10 @@ OPFOUR (T, IS, OS, S, OP2, OP3, OP4, OP5)
 FUNC (T, IS, OS, OP1, S)                \
 OPFIVE (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6)
 
+#define OPSEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7)        \
+FUNC (T, IS, OS, OP1, S)                \
+OPSIX (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6, OP7)
+
 #define OPELEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11)        \
 OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
 OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
@@ -49,6 +53,24 @@ OPELEVEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, m
 OPELEVEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min)
 OPELEVEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min)
 
+#define UNARY(OT,IT,OP,S)			\
+OT                                              \
+foo_##IT##OP##_##S (IT a)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S ((IT) v##OP##_##S (a), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) UNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+OPSEVEN (int8, 8, 16, s8, neg, abs, rbit, clz, cls, cnt, mvn)
+OPFIVE (int16, 4, 8, s16, neg, abs, clz, cls, mvn)
+OPFIVE (int32, 2, 4, s32, neg, abs, clz, cls, mvn)
+
+OPFIVE (uint8, 8, 16, u8, rbit, clz, cnt, cls, mvn)
+OPTHREE (uint16, 4, 8, u16, clz, cls, mvn)
+OPTHREE (uint32, 2, 4, u32, clz, cls, mvn)
+
 /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
 /* { dg-final { scan-assembler-not {\tmov\t} } }  */
 
-- 
2.47.2