RISC-V: Support {U}INT64 to FP16 auto-vectorization

author Pan Li <pan2.li@intel.com>

Thu, 28 Sep 2023 05:51:07 +0000 (13:51 +0800)

committer Pan Li <pan2.li@intel.com>

Thu, 28 Sep 2023 14:22:30 +0000 (22:22 +0800)
author Pan Li <pan2.li@intel.com>
Thu, 28 Sep 2023 05:51:07 +0000 (13:51 +0800)
committer Pan Li <pan2.li@intel.com>
Thu, 28 Sep 2023 14:22:30 +0000 (22:22 +0800)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md

index cd0cbdd288961784f454ba29b172191748ac9069..d6cf376ebca51c22fdc43ab39d72c1f0b4cd1cc3 100644 (file)
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -974,6 +974,30 @@
  }
  [(set_attr "type" "vfncvtitof")])
  
+;; This operation can be performed in the loop vectorizer but unfortunately
+;; not applicable for now. We can remove this pattern after loop vectorizer
+;; is able to take care of INT64 to FP16 conversion.
+(define_insn_and_split "<float_cvt><mode><vnnconvert>2"
+  [(set (match_operand:<VNNCONVERT>  0 "register_operand")
+       (any_float:<VNNCONVERT>
+         (match_operand:VWWCONVERTI 1 "register_operand")))]
+  "TARGET_VECTOR && TARGET_ZVFH && can_create_pseudo_p () && !flag_trapping_math"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx single = gen_reg_rtx (<VNCONVERT>mode); /* Get vector SF mode.  */
+
+    /* Step-1, INT64 => FP32.  */
+    emit_insn (gen_<float_cvt><mode><vnconvert>2 (single, operands[1]));
+    /* Step-2, FP32 => FP16.  */
+    emit_insn (gen_trunc<vnconvert><vnnconvert>2 (operands[0], single));
+
+    DONE;
+  }
+  [(set_attr "type" "vfncvtitof")]
+)
+
  ;; =========================================================================
  ;; == Unary arithmetic
  ;; =========================================================================
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md

index b6cd872eb42236934330118880f9331f166ad552..c9a7344b1bc5d5936419661db7f6630940bb4677 100644 (file)
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -1247,6 +1247,24 @@
    (V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096")
  ])
  
+(define_mode_iterator VWWCONVERTI [
+  (RVVM8DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+  (RVVM4DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+  (RVVM2DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+  (RVVM1DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+
+  (V1DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+  (V2DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+  (V4DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+  (V8DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 64")
+  (V16DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
+  (V32DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 256")
+  (V64DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 512")
+  (V128DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 1024")
+  (V256DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 2048")
+  (V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 4096")
+])
+
  (define_mode_iterator VQEXTI [
    RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
  
@@ -3243,6 +3261,26 @@
    (V512DF "v512si")
  ])
  
+;; NN indicates narrow twice
+(define_mode_attr VNNCONVERT [
+  (RVVM8DI "RVVM2HF") (RVVM4DI "RVVM1HF") (RVVM2DI "RVVMF2HF")
+  (RVVM1DI "RVVMF4HF")
+
+  (V1DI "V1HF") (V2DI "V2HF") (V4DI "V4HF") (V8DI "V8HF") (V16DI "V16HF")
+  (V32DI "V32HF") (V64DI "V64HF") (V128DI "V128HF") (V256DI "V256HF")
+  (V512DI "V512HF")
+])
+
+;; nn indicates narrow twice
+(define_mode_attr vnnconvert [
+  (RVVM8DI "rvvm2hf")  (RVVM4DI "rvvm1hf") (RVVM2DI "rvvmf2hf")
+  (RVVM1DI "rvvmf4hf")
+
+  (V1DI "v1hf") (V2DI "v2hf") (V4DI "v4hf") (V8DI "v8hf") (V16DI "v16hf")
+  (V32DI "v32hf") (V64DI "v64hf") (V128DI "v128hf") (V256DI "v256hf")
+  (V512DI "v512hf")
+])
+
  (define_mode_attr VDEMOTE [
    (RVVM8DI "RVVM8SI") (RVVM4DI "RVVM4SI") (RVVM2DI "RVVM2SI") (RVVM1DI "RVVM1SI")
    (V1DI "V1SI")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c

new file mode 100644 (file)

index 0000000..f08c121
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -ffast-math -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint.h>
+
+/*
+** test_int65_to_fp16:
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma
+**   vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+
+**   vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma
+**   vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+
+**   ...
+*/
+void
+test_int65_to_fp16 (int64_t * __restrict a, _Float16 *b, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    b[i] = (_Float16) (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c

new file mode 100644 (file)

index 0000000..2d8ba8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -ffast-math -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint.h>
+
+/*
+** test_uint65_to_fp16:
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma
+**   vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+
+**   vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma
+**   vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+
+**   ...
+*/
+void
+test_uint65_to_fp16 (uint64_t * __restrict a, _Float16 *b, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    b[i] = (_Float16) (a[i]);
+}
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c

new file mode 100644 (file)

index 0000000..5637b05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -ffast-math --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_CONVERT (fp16, int64_t, _Float16, 1)
+DEF_CONVERT (fp16, int64_t, _Float16, 2)
+DEF_CONVERT (fp16, int64_t, _Float16, 4)
+DEF_CONVERT (fp16, int64_t, _Float16, 8)
+DEF_CONVERT (fp16, int64_t, _Float16, 16)
+DEF_CONVERT (fp16, int64_t, _Float16, 32)
+DEF_CONVERT (fp16, int64_t, _Float16, 64)
+DEF_CONVERT (fp16, int64_t, _Float16, 128)
+DEF_CONVERT (fp16, int64_t, _Float16, 256)
+DEF_CONVERT (fp16, int64_t, _Float16, 512)
+DEF_CONVERT (fp16, int64_t, _Float16, 1024)
+DEF_CONVERT (fp16, int64_t, _Float16, 2048)
+
+DEF_CONVERT (fp16, uint64_t, _Float16, 1)
+DEF_CONVERT (fp16, uint64_t, _Float16, 2)
+DEF_CONVERT (fp16, uint64_t, _Float16, 4)
+DEF_CONVERT (fp16, uint64_t, _Float16, 8)
+DEF_CONVERT (fp16, uint64_t, _Float16, 16)
+DEF_CONVERT (fp16, uint64_t, _Float16, 32)
+DEF_CONVERT (fp16, uint64_t, _Float16, 64)
+DEF_CONVERT (fp16, uint64_t, _Float16, 128)
+DEF_CONVERT (fp16, uint64_t, _Float16, 256)
+DEF_CONVERT (fp16, uint64_t, _Float16, 512)
+DEF_CONVERT (fp16, uint64_t, _Float16, 1024)
+DEF_CONVERT (fp16, uint64_t, _Float16, 2048)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */
+/* { dg-final { scan-assembler-times {vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */
+/* { dg-final { scan-assembler-times {vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+} 30 } } */
author	Pan Li <pan2.li@intel.com>
	Thu, 28 Sep 2023 05:51:07 +0000 (13:51 +0800)
committer	Pan Li <pan2.li@intel.com>
	Thu, 28 Sep 2023 14:22:30 +0000 (22:22 +0800)
gcc/config/riscv/autovec.md		patch \| blob \| blame \| history
gcc/config/riscv/vector-iterators.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c	[new file with mode: 0644]	patch \| blob