]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
arm: Auto-vectorization for MVE: add __fp16 support to VCMP
authorChristophe Lyon <christophe.lyon@linaro.org>
Mon, 17 May 2021 12:31:58 +0000 (12:31 +0000)
committerChristophe Lyon <christophe.lyon@linaro.org>
Mon, 17 May 2021 12:31:58 +0000 (12:31 +0000)
This patch adds __fp16 support to the previous patch that added vcmp
support with MVE. For this we update existing expanders to use VDQWH
iterator, and add a new expander vcond<VH_cvtto><mode>.  In the
process we need to create suitable iterators, and update v_cmp_result
as needed.

2021-05-17  Christophe Lyon  <christophe.lyon@linaro.org>

gcc/
* config/arm/iterators.md (V16): New iterator.
(VH_cvtto): New iterator.
(v_cmp_result): Added V4HF and V8HF support.
* config/arm/vec-common.md (vec_cmp<mode><v_cmp_result>): Use VDQWH.
(vcond<mode><mode>): Likewise.
(vcond_mask_<mode><v_cmp_result>): Likewise.
(vcond<VH_cvtto><mode>): New expander.

gcc/testsuite/
* gcc.target/arm/simd/mve-compare-3.c: New test with GCC vectors.
* gcc.target/arm/simd/mve-vcmp-f16.c: New test for
auto-vectorization.
* gcc.target/arm/armv8_2-fp16-arith-1.c: Adjust since we now
vectorize float16_t vectors.

gcc/config/arm/iterators.md
gcc/config/arm/vec-common.md
gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c [new file with mode: 0644]

index a128465feeabd0b05ef65021b20f093f2185b299..3042bafc6c6ee2e40311e46d8f345e18c1072220 100644 (file)
 ;; Vector modes for 16-bit floating-point support.
 (define_mode_iterator VH [V8HF V4HF])
 
+;; Modes with 16-bit elements only.
+(define_mode_iterator V16 [V4HI V4HF V8HI V8HF])
+
 ;; 16-bit floating-point vector modes suitable for moving (includes BFmode).
 (define_mode_iterator VHFBF [V8HF V4HF V4BF V8BF])
 
 ;; (Opposite) mode to convert to/from for vector-half mode conversions.
 (define_mode_attr VH_CVTTO [(V4HI "V4HF") (V4HF "V4HI")
                            (V8HI "V8HF") (V8HF "V8HI")])
+(define_mode_attr VH_cvtto [(V4HI "v4hf") (V4HF "v4hi")
+                           (V8HI "v8hf") (V8HF "v8hi")])
 
 ;; Define element mode for each vector mode.
 (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
 (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
                                (V4HI "v4hi") (V8HI  "v8hi")
                                (V2SI "v2si") (V4SI  "v4si")
+                               (V4HF "v4hi") (V8HF  "v8hi")
                                (DI   "di")   (V2DI  "v2di")
                                (V2SF "v2si") (V4SF  "v4si")])
 
index 448731f7be9f74f900e6fbf8bdd229f19a1e439b..265fa40e74734c98fb70ec75de7e707c33cab3ef 100644 (file)
 (define_expand "vec_cmp<mode><v_cmp_result>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
        (match_operator:<V_cmp_result> 1 "comparison_operator"
-         [(match_operand:VDQW 2 "s_register_operand")
-          (match_operand:VDQW 3 "reg_or_zero_operand")]))]
+         [(match_operand:VDQWH 2 "s_register_operand")
+          (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
   "ARM_HAVE_<MODE>_ARITH
    && !TARGET_REALLY_IWMMXT
    && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 ;; element-wise.
 
 (define_expand "vcond<mode><mode>"
-  [(set (match_operand:VDQW 0 "s_register_operand")
-       (if_then_else:VDQW
+  [(set (match_operand:VDQWH 0 "s_register_operand")
+       (if_then_else:VDQWH
          (match_operator 3 "comparison_operator"
-           [(match_operand:VDQW 4 "s_register_operand")
-            (match_operand:VDQW 5 "reg_or_zero_operand")])
-         (match_operand:VDQW 1 "s_register_operand")
-         (match_operand:VDQW 2 "s_register_operand")))]
+           [(match_operand:VDQWH 4 "s_register_operand")
+            (match_operand:VDQWH 5 "reg_or_zero_operand")])
+         (match_operand:VDQWH 1 "s_register_operand")
+         (match_operand:VDQWH 2 "s_register_operand")))]
   "ARM_HAVE_<MODE>_ARITH
    && !TARGET_REALLY_IWMMXT
    && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
   DONE;
 })
 
+(define_expand "vcond<VH_cvtto><mode>"
+  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand")
+       (if_then_else:<VH_CVTTO>
+         (match_operator 3 "comparison_operator"
+           [(match_operand:V16 4 "s_register_operand")
+            (match_operand:V16 5 "reg_or_zero_operand")])
+         (match_operand:<VH_CVTTO> 1 "s_register_operand")
+         (match_operand:<VH_CVTTO> 2 "s_register_operand")))]
+  "ARM_HAVE_<MODE>_ARITH
+   && !TARGET_REALLY_IWMMXT
+   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  arm_expand_vcond (operands, <V_cmp_result>mode);
+  DONE;
+})
+
 (define_expand "vcondu<mode><v_cmp_result>"
   [(set (match_operand:VDQW 0 "s_register_operand")
        (if_then_else:VDQW
 })
 
 (define_expand "vcond_mask_<mode><v_cmp_result>"
-  [(set (match_operand:VDQW 0 "s_register_operand")
-        (if_then_else:VDQW
+  [(set (match_operand:VDQWH 0 "s_register_operand")
+        (if_then_else:VDQWH
           (match_operand:<V_cmp_result> 3 "s_register_operand")
-          (match_operand:VDQW 1 "s_register_operand")
-          (match_operand:VDQW 2 "s_register_operand")))]
+          (match_operand:VDQWH 1 "s_register_operand")
+          (match_operand:VDQWH 2 "s_register_operand")))]
   "ARM_HAVE_<MODE>_ARITH
    && !TARGET_REALLY_IWMMXT
    && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
index 921d26e2ed4637103160e78bcaefa681d42e1e0a..52b87376dc782c9e6ef7e8427c782baa1813dc7f 100644 (file)
@@ -104,8 +104,20 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t)
 /* { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } }  */
 
 /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } }  */
-/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } }  */
-/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } }  */
+
+/* For float16_t.  */
+/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 2 } }  */
+/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 4 } }  */
+
+/* For float16x4_t.  */
+/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+} 2 } }  */
+/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+} 2 } }  */
+/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+} 2 } }  */
+
+/* For float16x8_t.  */
+/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+} 2 } }  */
+/* { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+} 2 } }  */
+/* { dg-final { scan-assembler-times {vcgt\.f16\tq[0-9]+, q[0-9]+} 2 } }  */
 
 /* { dg-final { scan-assembler-not {vadd\.f32} } }  */
 /* { dg-final { scan-assembler-not {vsub\.f32} } }  */
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c
new file mode 100644 (file)
index 0000000..76f81e8
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
+
+/* float 16 tests.  */
+
+#ifndef ELEM_TYPE
+#define ELEM_TYPE __fp16
+#endif
+#ifndef INT_ELEM_TYPE
+#define INT_ELEM_TYPE __INT16_TYPE__
+#endif
+
+#define COMPARE(NAME, OP)                      \
+  int_vec                                      \
+  cmp_##NAME##_reg (vec a, vec b)              \
+  {                                            \
+    return a OP b;                             \
+  }
+
+typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16)));
+typedef ELEM_TYPE vec __attribute__((vector_size(16)));
+
+COMPARE (eq, ==)
+COMPARE (ne, !=)
+COMPARE (lt, <)
+COMPARE (le, <=)
+COMPARE (gt, >)
+COMPARE (ge, >=)
+
+/* eq, ne, lt, le, gt, ge.
+/* { dg-final { scan-assembler-times {\tvcmp.f16\teq, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tne, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tlt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tle, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tgt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tge, q[0-9]+, q[0-9]+\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c
new file mode 100644 (file)
index 0000000..dbae2d1
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
+
+#include <stdint.h>
+
+#define NB 8
+
+#define FUNC(OP, NAME)                                                 \
+  void test_ ## NAME ##_f (__fp16 * __restrict__ dest, __fp16 *a, __fp16 *b) { \
+    int i;                                                             \
+    for (i=0; i<NB; i++) {                                             \
+      dest[i] = a[i] OP b[i];                                          \
+    }                                                                  \
+  }
+
+FUNC(==, vcmpeq)
+FUNC(!=, vcmpne)
+FUNC(<, vcmplt)
+FUNC(<=, vcmple)
+FUNC(>, vcmpgt)
+FUNC(>=, vcmpge)
+
+/* { dg-final { scan-assembler-times {\tvcmp.f16\teq, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tne, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tlt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tle, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tgt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f16\tge, q[0-9]+, q[0-9]+\n} 1 } } */