From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed, 24 May 2023 13:52:34 +0000 (+0100)
Subject: aarch64: PR target/99195 Annotate vector shift patterns for vec-concat-zero
X-Git-Tag: basepoints/gcc-15~8972
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e;p=thirdparty%2Fgcc.git

aarch64: PR target/99195 Annotate vector shift patterns for vec-concat-zero

Continuing the series of straightforward annotations, this one handles the normal (not widening or narrowing) vector shifts.
Tests included.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

gcc/ChangeLog:

	PR target/99195
	* config/aarch64/aarch64-simd.md (aarch64_simd_lshr<mode>): Rename to...
	(aarch64_simd_lshr<mode><vczle><vczbe>): ... This.
	(aarch64_simd_ashr<mode>): Rename to...
	(aarch64_simd_ashr<mode><vczle><vczbe>): ... This.
	(aarch64_simd_imm_shl<mode>): Rename to...
	(aarch64_simd_imm_shl<mode><vczle><vczbe>): ... This.
	(aarch64_simd_reg_sshl<mode>): Rename to...
	(aarch64_simd_reg_sshl<mode><vczle><vczbe>): ... This.
	(aarch64_simd_reg_shl<mode>_unsigned): Rename to...
	(aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>): ... This.
	(aarch64_simd_reg_shl<mode>_signed): Rename to...
	(aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>): ... This.
	(vec_shr_<mode>): Rename to...
	(vec_shr_<mode><vczle><vczbe>): ... This.
	(aarch64_<sur>shl<mode>): Rename to...
	(aarch64_<sur>shl<mode><vczle><vczbe>): ... This.
	(aarch64_<sur>q<r>shl<mode>): Rename to...
	(aarch64_<sur>q<r>shl<mode><vczle><vczbe>): ... This.

gcc/testsuite/ChangeLog:

	PR target/99195
	* gcc.target/aarch64/simd/pr99195_1.c: Add testing for shifts.
	* gcc.target/aarch64/simd/pr99195_6.c: Likewise.
	* gcc.target/aarch64/simd/pr99195_8.c: New test.
---

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index af95bbb29a7b..0df97310fd98 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1280,7 +1280,7 @@
   DONE;
 })
 
-(define_insn "aarch64_simd_lshr<mode>"
+(define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
@@ -1289,7 +1289,7 @@
   [(set_attr "type" "neon_shift_imm<q>")]
 )
 
-(define_insn "aarch64_simd_ashr<mode>"
+(define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
 		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "D1,Dr")))]
@@ -1312,7 +1312,7 @@
   [(set_attr "type" "neon_shift_acc<q>")]
 )
 
-(define_insn "aarch64_simd_imm_shl<mode>"
+(define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 		   (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
@@ -1321,7 +1321,7 @@
   [(set_attr "type" "neon_shift_imm<q>")]
 )
 
-(define_insn "aarch64_simd_reg_sshl<mode>"
+(define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 		   (match_operand:VDQ_I 2 "register_operand" "w")))]
@@ -1330,7 +1330,7 @@
   [(set_attr "type" "neon_shift_reg<q>")]
 )
 
-(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
+(define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 		    (match_operand:VDQ_I 2 "register_operand" "w")]
@@ -1340,7 +1340,7 @@
   [(set_attr "type" "neon_shift_reg<q>")]
 )
 
-(define_insn "aarch64_simd_reg_shl<mode>_signed"
+(define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 		    (match_operand:VDQ_I 2 "register_operand" "w")]
@@ -1522,7 +1522,7 @@
 )
 
 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
-(define_insn "vec_shr_<mode>"
+(define_insn "vec_shr_<mode><vczle><vczbe>"
   [(set (match_operand:VD 0 "register_operand" "=w")
         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
 		    (match_operand:SI 2 "immediate_operand" "i")]
@@ -6340,7 +6340,7 @@
 
 ;; vshl
 
-(define_insn "aarch64_<sur>shl<mode>"
+(define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
         (unspec:VSDQ_I_DI
 	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
@@ -6354,7 +6354,7 @@
 
 ;; vqshl
 
-(define_insn "aarch64_<sur>q<r>shl<mode>"
+(define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
         (unspec:VSDQ_I
 	  [(match_operand:VSDQ_I 1 "register_operand" "w")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index fde501d28e36..8b6548a154fb 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -65,9 +65,9 @@ OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn
 OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
-OPFOUR (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2)
-OPFOUR (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2)
-OPFOUR (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2)
+OPSIX (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2, shl, qshl)
+OPSIX (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl)
+OPSIX (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl)
 
 OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
index 52ad2709400a..c86506e96d1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
@@ -25,6 +25,16 @@ MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16)
 MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32)
 MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64)
 
+MYOP (uint8x16_t, uint8x8_t, int8x8_t, shl, u8)
+MYOP (uint16x8_t, uint16x4_t, int16x4_t, shl, u16)
+MYOP (uint32x4_t, uint32x2_t, int32x2_t, shl, u32)
+MYOP (uint64x2_t, uint64x1_t, int64x1_t, shl, u64)
+
+MYOP (uint8x16_t, uint8x8_t, int8x8_t, qshl, u8)
+MYOP (uint16x8_t, uint16x4_t, int16x4_t, qshl, u16)
+MYOP (uint32x4_t, uint32x2_t, int32x2_t, qshl, u32)
+MYOP (uint64x2_t, uint64x1_t, int64x1_t, qshl, u64)
+
 /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
 /* { dg-final { scan-assembler-not {\tmov\t} } }  */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c
new file mode 100644
index 000000000000..29499e71df68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c
@@ -0,0 +1,39 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define MYOP(OT,IT1,OP,S,OS)					\
+OT								\
+foo_##OP##_##S##OS (IT1 a)					\
+{								\
+  IT1 zeros = vcreate_##S##OS (0);				\
+  return vcombine_##S##OS (v##OP##_##S##OS (a, 3), zeros);	\
+}								\
+OT								\
+foo_##OP##_##S##OS##_s (IT1 a)					\
+{								\
+  IT1 zeros = vcreate_##S##OS (0);				\
+  return vcombine_##S##OS (v##OP##_##S##OS (a, OS - 1), zeros);	\
+}
+
+MYOP (int8x16_t, int8x8_t, shr_n, s, 8)
+MYOP (int16x8_t, int16x4_t, shr_n, s, 16)
+MYOP (int32x4_t, int32x2_t, shr_n, s, 32)
+MYOP (uint8x16_t, uint8x8_t, shr_n, u, 8)
+MYOP (uint16x8_t, uint16x4_t, shr_n, u, 16)
+MYOP (uint32x4_t, uint32x2_t, shr_n, u, 32)
+MYOP (int8x16_t, int8x8_t, shl_n, s, 8)
+MYOP (int16x8_t, int16x4_t, shl_n, s, 16)
+MYOP (int32x4_t, int32x2_t, shl_n, s, 32)
+MYOP (uint8x16_t, uint8x8_t, shl_n, u, 8)
+MYOP (uint16x8_t, uint16x4_t, shl_n, u, 16)
+MYOP (uint32x4_t, uint32x2_t, shl_n, u, 32)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+