From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed, 10 May 2023 09:40:06 +0000 (+0100)
Subject: aarch64: PR target/99195 annotate simple narrowing patterns for vec-concat-zero
X-Git-Tag: basepoints/gcc-15~9488
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d1e7f9993084b87e6676a5ccef3c8b7f807a6013;p=thirdparty%2Fgcc.git

aarch64: PR target/99195 annotate simple narrowing patterns for vec-concat-zero

This patch cleans up some almost-duplicate patterns for the XTN, SQXTN, UQXTN instructions.
Using the <vczle><vczbe> attributes we can remove the BYTES_BIG_ENDIAN and !BYTES_BIG_ENDIAN cases,
as well as the intrinsic expanders that select between the two.
Tests are also added. Thankfully the diffstat comes out negative \O/.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

gcc/ChangeLog:

	PR target/99195
	* config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le): Delete.
	(aarch64_xtn<mode>_insn_be): Likewise.
	(trunc<mode><Vnarrowq>2): Rename to...
	(trunc<mode><Vnarrowq>2<vczle><vczbe>): ... This.
	(aarch64_xtn<mode>): Move under the above.  Just emit the truncate RTL.
	(aarch64_<su>qmovn<mode>): Likewise.
	(aarch64_<su>qmovn<mode><vczle><vczbe>): New define_insn.
	(aarch64_<su>qmovn<mode>_insn_le): Delete.
	(aarch64_<su>qmovn<mode>_insn_be): Likewise.

gcc/testsuite/ChangeLog:

	PR target/99195
	* gcc.target/aarch64/simd/pr99195_4.c: Add tests for vmovn, vqmovn.
---

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 46038889573a..9ad0489f79a7 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1743,47 +1743,6 @@
 
 ;; Narrowing operations.
 
-(define_insn "aarch64_xtn<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
-	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "xtn\\t%0.<Vntype>, %1.<Vtype>"
-  [(set_attr "type" "neon_move_narrow_q")]
-)
-
-(define_insn "aarch64_xtn<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
-	  (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "xtn\\t%0.<Vntype>, %1.<Vtype>"
-  [(set_attr "type" "neon_move_narrow_q")]
-)
-
-(define_expand "aarch64_xtn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand")
-	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
-  "TARGET_SIMD"
-  {
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
-				CONST0_RTX (<VNARROWQ>mode)));
-    else
-      emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
-				CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-						 <VNARROWQ2>mode));
-    DONE;
-  }
-)
-
 (define_insn "aarch64_xtn2<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
@@ -5300,7 +5259,7 @@
 
 ;; sqmovn and uqmovn
 
-(define_insn "aarch64_<su>qmovn<mode>"
+(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
 	(SAT_TRUNC:<VNARROWQ>
 	  (match_operand:SD_HSDI 1 "register_operand" "w")))]
@@ -5309,48 +5268,13 @@
   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
-(define_insn "aarch64_<su>qmovn<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (SAT_TRUNC:<VNARROWQ>
-	    (match_operand:VQN 1 "register_operand" "w"))
-	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_<su>qmovn<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
-	  (SAT_TRUNC:<VNARROWQ>
-	    (match_operand:VQN 1 "register_operand" "w"))))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
-)
-
-(define_expand "aarch64_<su>qmovn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
 	(SAT_TRUNC:<VNARROWQ>
-	  (match_operand:VQN 1 "register_operand")))]
+	  (match_operand:VQN 1 "register_operand" "w")))]
   "TARGET_SIMD"
-  {
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
-				CONST0_RTX (<VNARROWQ>mode)));
-    else
-      emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
-				CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-						 <VNARROWQ2>mode));
-    DONE;
-  }
+  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
 (define_insn "aarch64_<su>qxtn2<mode>_le"
@@ -9281,7 +9205,7 @@
 )
 
 ;; Truncate a 128-bit integer vector to a 64-bit vector.
-(define_insn "trunc<mode><Vnarrowq>2"
+(define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
 	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
   "TARGET_SIMD"
@@ -9289,6 +9213,15 @@
   [(set_attr "type" "neon_move_narrow_q")]
 )
 
+;; Expander for the intrinsics that only takes one mode unlike the two-mode
+;; trunc optab.
+(define_expand "aarch64_xtn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
+  "TARGET_SIMD"
+  {}
+)
+
 (define_insn "aarch64_bfdot<mode>"
   [(set (match_operand:VDQSF 0 "register_operand" "=w")
 	(plus:VDQSF
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c
index b6ef15b6a972..6127cb26781b 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c
@@ -15,7 +15,6 @@ foo_##OP##_##OS (IT a, IT b)                     \
   return vcombine_##OS (v##OP##_##IS (a, b), zeros);      \
 }
 
-
 #define FUNC(OT,IT,IMT,IS,OS)			\
 MYOP (OT, IT, IMT, addhn, IS, OS)		\
 MYOP (OT, IT, IMT, subhn, IS, OS)		\
@@ -30,6 +29,27 @@ FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8)
 FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16)
 FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32)
 
+#undef MYOP
+#define MYOP(OT,IT,IMT,OP,IS,OS)		\
+OT						\
+foo_##OP##_##OS (IT a)				\
+{						\
+  IMT zeros = vcreate_##OS (0);			\
+  return vcombine_##OS (v##OP##_##IS (a), zeros);	\
+}
+
+#undef FUNC
+#define FUNC(OP)					\
+MYOP (int8x16_t, int16x8_t, int8x8_t, OP, s16, s8)	\
+MYOP (int16x8_t, int32x4_t, int16x4_t, OP, s32, s16)	\
+MYOP (int32x4_t, int64x2_t, int32x2_t, OP, s64, s32)	\
+MYOP (uint8x16_t, uint16x8_t, uint8x8_t, OP, u16, u8)	\
+MYOP (uint16x8_t, uint32x4_t, uint16x4_t, OP, u32, u16)	\
+MYOP (uint32x4_t, uint64x2_t, uint32x2_t, OP, u64, u32)	\
+
+FUNC (movn)
+FUNC (qmovn)
+
 /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
 /* { dg-final { scan-assembler-not {\tmov\t} } }  */