aarch64: PR target/99195 annotate vector compare patterns for vec-concat-zero

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Mon, 15 May 2023 08:55:44 +0000 (09:55 +0100)

committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Mon, 15 May 2023 08:55:44 +0000 (09:55 +0100)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 15 May 2023 08:55:44 +0000 (09:55 +0100)
committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 15 May 2023 08:55:44 +0000 (09:55 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 26320f2e3922c0b7d3780ac34b7fef6be51b7dc8..61c815b9a1fc68e7068559a323f09811902378c3 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6615,7 +6615,7 @@
  ;; Note, we have constraints for Dz and Z as different expanders
  ;; have different ideas of what should be passed to this pattern.
  
-(define_insn "aarch64_cm<optab><mode>"
+(define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
    [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
         (neg:<V_INT_EQUIV>
           (COMPARISONS:<V_INT_EQUIV>
@@ -6680,7 +6680,7 @@
  
  ;; cm(hs|hi)
  
-(define_insn "aarch64_cm<optab><mode>"
+(define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
    [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
         (neg:<V_INT_EQUIV>
           (UCOMPARISONS:<V_INT_EQUIV>
@@ -6747,7 +6747,7 @@
  ;; which is rewritten by simplify_rtx as
  ;; plus (eq (and x y) 0) -1.
  
-(define_insn "aarch64_cmtst<mode>"
+(define_insn "aarch64_cmtst<mode><vczle><vczbe>"
    [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
         (plus:<V_INT_EQUIV>
           (eq:<V_INT_EQUIV>
@@ -6766,7 +6766,7 @@
  ;; not (neq (eq x 0)) in which case you rewrite it to
  ;; a comparison against itself
  
-(define_insn "*aarch64_cmtst_same_<mode>"
+(define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
    [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
         (plus:<V_INT_EQUIV>
           (eq:<V_INT_EQUIV>
@@ -6817,7 +6817,7 @@
    [(set_attr "type" "neon_tst,multiple")]
  )
  
-(define_insn "*aarch64_cmtstdi"
+(define_insn "*aarch64_cmtstdi<vczle><vczbe>"
    [(set (match_operand:DI 0 "register_operand" "=w")
         (neg:DI
           (ne:DI
@@ -6832,7 +6832,7 @@
  
  ;; fcm(eq|ge|gt|le|lt)
  
-(define_insn "aarch64_cm<optab><mode>"
+(define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
    [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
         (neg:<V_INT_EQUIV>
           (COMPARISONS:<V_INT_EQUIV>
@@ -6850,7 +6850,7 @@
  ;; Note we can also handle what would be fac(le|lt) by
  ;; generating fac(ge|gt).
  
-(define_insn "aarch64_fac<optab><mode>"
+(define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
    [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
         (neg:<V_INT_EQUIV>
           (FAC_COMPARISONS:<V_INT_EQUIV>
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_7.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_7.c

new file mode 100644 (file)

index 0000000..86bd729
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_7.c
@@ -0,0 +1,96 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define MYOP(OT,IT,IMT,OP,IS,OS)                       \
+OT                                                     \
+foo_##OP##_##OS##_##IT##_##IS (IT a, IT b)             \
+{                                                      \
+  IMT zeros = vcreate_##OS (0);                                \
+  return vcombine_##OS (v##OP##_##IS (a, b), zeros);   \
+}
+
+#define FUNC(OT,IT,IMT,IS,OS)          \
+MYOP (OT, IT, IMT, ceq, IS, OS)                \
+MYOP (OT, IT, IMT, clt, IS, OS)                \
+MYOP (OT, IT, IMT, cge, IS, OS)                \
+MYOP (OT, IT, IMT, cle, IS, OS)                \
+MYOP (OT, IT, IMT, cgt, IS, OS)                \
+MYOP (OT, IT, IMT, tst, IS, OS)
+
+#define MYFUNC(PFX, T, S, N, DN)               \
+FUNC (uint##S##x##DN##_t, T##S##x##N##_t, uint##S##x##N##_t, PFX##S, u##S)
+
+MYFUNC (s, int, 8, 8, 16)
+MYFUNC (s, int, 16, 4, 8)
+MYFUNC (s, int, 32, 2, 4)
+MYFUNC (u, uint, 8, 8, 16)
+MYFUNC (u, uint, 16, 4, 8)
+MYFUNC (u, uint, 32, 2, 4)
+
+#undef FUNC
+#define FUNC(OT,IT,IMT,IS,OS)          \
+MYOP (OT, IT, IMT, ceq, IS, OS)                \
+MYOP (OT, IT, IMT, clt, IS, OS)                \
+MYOP (OT, IT, IMT, cge, IS, OS)                \
+MYOP (OT, IT, IMT, cle, IS, OS)                \
+MYOP (OT, IT, IMT, cgt, IS, OS)
+
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+MYFUNC (f, float, 16, 4, 8)
+#pragma GCC pop_options
+MYFUNC (f, float, 32, 2, 4)
+MYFUNC (f, float, 64, 1, 2)
+
+#undef FUNC
+#define FUNC(OT,IT,IMT,IS,OS)                  \
+MYOP (OT, IT, IMT, cale, IS, OS)               \
+MYOP (OT, IT, IMT, cagt, IS, OS)               \
+MYOP (OT, IT, IMT, calt, IS, OS)               \
+MYOP (OT, IT, IMT, cage, IS, OS)               \
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+MYFUNC (f, float, 16, 4, 8)
+#pragma GCC pop_options
+MYFUNC (f, float, 32, 2, 4)
+MYFUNC (f, float, 64, 1, 2)
+
+#undef MYOP
+#define MYOP(OT,IT,IMT,OP,IS,OS)                       \
+OT                                                     \
+foo_##OP##_##OS##_##IT##_z (IT a)                      \
+{                                                      \
+  IMT zeros = vcreate_##OS (0);                                \
+  return vcombine_##OS (v##OP##_##IS (a), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(OT,IT,IMT,IS,OS)                  \
+MYOP (OT, IT, IMT, cltz, IS, OS)               \
+MYOP (OT, IT, IMT, ceqz, IS, OS)               \
+MYOP (OT, IT, IMT, cgez, IS, OS)               \
+MYOP (OT, IT, IMT, cgtz, IS, OS)               \
+MYOP (OT, IT, IMT, clez, IS, OS)               \
+
+MYFUNC (s, int, 8, 8, 16)
+MYFUNC (s, int, 16, 4, 8)
+MYFUNC (s, int, 32, 2, 4)
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+MYFUNC (f, float, 16, 4, 8)
+#pragma GCC pop_options
+MYFUNC (f, float, 32, 2, 4)
+MYFUNC (f, float, 64, 1, 2)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Mon, 15 May 2023 08:55:44 +0000 (09:55 +0100)
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Mon, 15 May 2023 08:55:44 +0000 (09:55 +0100)
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/simd/pr99195_7.c	[new file with mode: 0644]	patch \| blob