aarch64: PR target/99195 annotate simple floating-point patterns for vec-concat with...

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 3 May 2023 10:15:34 +0000 (11:15 +0100)

committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 3 May 2023 10:15:34 +0000 (11:15 +0100)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 3 May 2023 10:15:34 +0000 (11:15 +0100)
committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 3 May 2023 10:15:34 +0000 (11:15 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 0a1f12cc3d0224996a3250cc4fc9c62e7340f93a..9ba435fd25208855a47ecd1787425030cae96df9 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2846,7 +2846,7 @@
  
  ;; FP arithmetic operations.
  
-(define_insn "add<mode>3"
+(define_insn "add<mode>3<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
                    (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2855,7 +2855,7 @@
    [(set_attr "type" "neon_fp_addsub_<stype><q>")]
  )
  
-(define_insn "sub<mode>3"
+(define_insn "sub<mode>3<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
                     (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2864,7 +2864,7 @@
    [(set_attr "type" "neon_fp_addsub_<stype><q>")]
  )
  
-(define_insn "mul<mode>3"
+(define_insn "mul<mode>3<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
                    (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2885,7 +2885,7 @@
    operands[1] = force_reg (<MODE>mode, operands[1]);
  })
  
-(define_insn "*div<mode>3"
+(define_insn "*div<mode>3<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
                  (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2915,7 +2915,7 @@
    }
  )
  
-(define_insn "neg<mode>2"
+(define_insn "neg<mode>2<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
   "TARGET_SIMD"
@@ -2923,7 +2923,7 @@
    [(set_attr "type" "neon_fp_neg_<stype><q>")]
  )
  
-(define_insn "abs<mode>2"
+(define_insn "abs<mode>2<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
   "TARGET_SIMD"
@@ -3228,7 +3228,7 @@
  
  ;; Vector versions of the floating-point frint patterns.
  ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
-(define_insn "<frint_pattern><mode>2"
+(define_insn "<frint_pattern><mode>2<vczle><vczbe>"
    [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
                        FRINT))]
@@ -3583,7 +3583,7 @@
  ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
  ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
  ;; which implement the IEEE fmax ()/fmin () functions.
-(define_insn "<fmaxmin><mode>3"
+(define_insn "<fmaxmin><mode>3<vczle><vczbe>"
    [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
                       (match_operand:VHSDF 2 "register_operand" "w")]
@@ -6960,7 +6960,7 @@
      DONE;
  })
  
-(define_insn "*sqrt<mode>2"
+(define_insn "*sqrt<mode>2<vczle><vczbe>"
    [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
    "TARGET_SIMD"
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c

index 96834425d51923120daa31300a6ec1cd5e619232..29a2e90e92a7aa0f6f4358b776f15602ccd031a9 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -41,6 +41,14 @@ OPFIVE (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6)
  FUNC (T, IS, OS, OP1, S)                \
  OPSIX (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6, OP7)
  
+#define OPEIGHT(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8)        \
+OPTHREE (T, IS, OS, S, OP1, OP2, OP3)                \
+OPFIVE (T, IS, OS, S, OP4, OP5, OP6, OP7, OP8)
+
+#define OPTEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10)        \
+OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
+OPFIVE (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10)
+
  #define OPELEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11)        \
  OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
  OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
@@ -53,6 +61,8 @@ OPELEVEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, m
  OPELEVEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min)
  OPELEVEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min)
  
+OPEIGHT (float32, 2, 4, f32, add, sub, mul, div, max, maxnm, min, minnm)
+
  #define UNARY(OT,IT,OP,S)                      \
  OT                                              \
  foo_##IT##OP##_##S (IT a)                     \
@@ -71,6 +81,7 @@ OPFIVE (uint8, 8, 16, u8, rbit, clz, cnt, cls, mvn)
  OPTHREE (uint16, 4, 8, u16, clz, cls, mvn)
  OPTHREE (uint32, 2, 4, u32, clz, cls, mvn)
  
+OPTEN (float32, 2, 4, f32, neg, abs, sqrt, rnd, rndi, rndm, rnda, rndn, rndp, rndx)
  /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
  /* { dg-final { scan-assembler-not {\tmov\t} } }  */
  
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c

new file mode 100644 (file)

index 0000000..603c5ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c
@@ -0,0 +1,72 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.2-a+fp16" } */
+
+#include <arm_neon.h>
+
+#define BINARY(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b), zeros);      \
+}
+
+#define FUNC(T,IS,OS,OP,S) BINARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+#define OPTWO(T,IS,OS,S,OP1,OP2)        \
+FUNC (T, IS, OS, OP1, S)                \
+FUNC (T, IS, OS, OP2, S)
+
+#define OPTHREE(T, IS, OS, S, OP1, OP2, OP3)    \
+FUNC (T, IS, OS, OP1, S)        \
+OPTWO (T, IS, OS, S, OP2, OP3)
+
+#define OPFOUR(T,IS,OS,S,OP1,OP2,OP3,OP4)       \
+FUNC (T, IS, OS, OP1, S)                \
+OPTHREE (T, IS, OS, S, OP2, OP3, OP4)
+
+#define OPFIVE(T,IS,OS,S,OP1,OP2,OP3,OP4, OP5)  \
+FUNC (T, IS, OS, OP1, S)                \
+OPFOUR (T, IS, OS, S, OP2, OP3, OP4, OP5)
+
+#define OPSIX(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6)        \
+FUNC (T, IS, OS, OP1, S)                \
+OPFIVE (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6)
+
+#define OPSEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7)        \
+FUNC (T, IS, OS, OP1, S)                \
+OPSIX (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6, OP7)
+
+#define OPEIGHT(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8)        \
+OPTHREE (T, IS, OS, S, OP1, OP2, OP3)                \
+OPFIVE (T, IS, OS, S, OP4, OP5, OP6, OP7, OP8)
+
+#define OPTEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10)        \
+OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
+OPFIVE (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10)
+
+#define OPELEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11)        \
+OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
+OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
+
+OPEIGHT (float16, 4, 8, f16, add, sub, mul, div, max, maxnm, min, minnm)
+
+#define UNARY(OT,IT,OP,S)                      \
+OT                                              \
+foo_##IT##OP##_##S (IT a)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S ((IT) v##OP##_##S (a), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) UNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+OPTEN (float16, 4, 8, f16, neg, abs, sqrt, rnd, rndi, rndm, rnda, rndn, rndp, rndx)
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 3 May 2023 10:15:34 +0000 (11:15 +0100)
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 3 May 2023 10:15:34 +0000 (11:15 +0100)
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c	[new file with mode: 0644]	patch \| blob