aarch64: PR target/99195 Annotate saturating mult patterns for vec-concat-zero

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 31 May 2023 16:43:20 +0000 (17:43 +0100)

committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 31 May 2023 16:43:20 +0000 (17:43 +0100)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 31 May 2023 16:43:20 +0000 (17:43 +0100)
committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 31 May 2023 16:43:20 +0000 (17:43 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 2cd8b82df0fbc89f1abff49e191a32c76430cfa9..1efae8d5e6834d251c9a44f04a87ec0ddb894b9b 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5510,7 +5510,7 @@
  
  ;; sq<r>dmulh.
  
-(define_insn "aarch64_sq<r>dmulh<mode>"
+(define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
    [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
         (unspec:VSDQ_HSI
           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
@@ -5521,7 +5521,7 @@
    [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
  )
  
-(define_insn "aarch64_sq<r>dmulh_n<mode>"
+(define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
    [(set (match_operand:VDQHS 0 "register_operand" "=w")
         (unspec:VDQHS
           [(match_operand:VDQHS 1 "register_operand" "w")
@@ -5535,7 +5535,7 @@
  
  ;; sq<r>dmulh_lane
  
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
    [(set (match_operand:VDQHS 0 "register_operand" "=w")
          (unspec:VDQHS
           [(match_operand:VDQHS 1 "register_operand" "w")
@@ -5550,7 +5550,7 @@
    [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
  )
  
-(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
+(define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
    [(set (match_operand:VDQHS 0 "register_operand" "=w")
          (unspec:VDQHS
           [(match_operand:VDQHS 1 "register_operand" "w")
@@ -5597,7 +5597,7 @@
  
  ;; sqrdml[as]h.
  
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
    [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
         (unspec:VSDQ_HSI
           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
@@ -5611,7 +5611,7 @@
  
  ;; sqrdml[as]h_lane.
  
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
    [(set (match_operand:VDQHS 0 "register_operand" "=w")
         (unspec:VDQHS
           [(match_operand:VDQHS 1 "register_operand" "0")
@@ -5629,7 +5629,7 @@
     [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
  )
  
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
    [(set (match_operand:SD_HSI 0 "register_operand" "=w")
         (unspec:SD_HSI
           [(match_operand:SD_HSI 1 "register_operand" "0")
@@ -5649,7 +5649,7 @@
  
  ;; sqrdml[as]h_laneq.
  
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
    [(set (match_operand:VDQHS 0 "register_operand" "=w")
         (unspec:VDQHS
           [(match_operand:VDQHS 1 "register_operand" "0")
@@ -5667,7 +5667,7 @@
     [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
  )
  
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
    [(set (match_operand:SD_HSI 0 "register_operand" "=w")
         (unspec:SD_HSI
           [(match_operand:SD_HSI 1 "register_operand" "0")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c

index 8b6548a154fb0d0601d41e71ca8fd1d3af1d5d49..765cb270b4cac6613f0e751ce66d758c67a009ff 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -66,8 +66,8 @@ OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, or
  OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
  
  OPSIX (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2, shl, qshl)
-OPSIX (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl)
-OPSIX (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl)
+OPEIGHT (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl, qdmulh, qrdmulh)
+OPEIGHT (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl, qdmulh, qrdmulh)
  
  OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
  OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c

new file mode 100644 (file)

index 0000000..9db5400
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c
@@ -0,0 +1,43 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.1-a+rdma" } */
+
+#include <arm_neon.h>
+
+#define OPTWO(T,IS,OS,S,OP1,OP2)        \
+FUNC (T, IS, OS, OP1, S)                \
+FUNC (T, IS, OS, OP2, S)
+
+#define TERNARY(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b, IT c)                 \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) TERNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+OPTWO (int16, 4, 8, s16, qrdmlah, qrdmlsh)
+OPTWO (int32, 2, 4, s32, qrdmlah, qrdmlsh)
+
+#define TERNARY_IDX(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b, IT c)                 \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c, 0), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) TERNARY_IDX (T##x##OS##_t, T##x##IS##_t, OP, S)
+OPTWO (int16, 4, 8, s16, qrdmlah_lane, qrdmlsh_lane)
+OPTWO (int32, 2, 4, s32, qrdmlah_lane, qrdmlsh_lane)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 31 May 2023 16:43:20 +0000 (17:43 +0100)
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 31 May 2023 16:43:20 +0000 (17:43 +0100)
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c	[new file with mode: 0644]	patch \| blob