RISC-V: Support RVV FP16 ZVFH Reduction floating-point intrinsic API

author Pan Li <pan2.li@intel.com>

Tue, 6 Jun 2023 02:07:37 +0000 (10:07 +0800)

committer Pan Li <pan2.li@intel.com>

Tue, 6 Jun 2023 02:11:38 +0000 (10:11 +0800)
author Pan Li <pan2.li@intel.com>
Tue, 6 Jun 2023 02:07:37 +0000 (10:07 +0800)
committer Pan Li <pan2.li@intel.com>
Tue, 6 Jun 2023 02:11:38 +0000 (10:11 +0800)
diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def b/gcc/config/riscv/riscv-vector-builtins-types.def

index 1e2491de6d618098449a1ef7f3af8744fa8e7996..bd3deae8340a8eb156a642050b5d093efd599bfe 100644 (file)
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -634,6 +634,13 @@ DEF_RVV_WU_OPS (vuint32m2_t, 0)
  DEF_RVV_WU_OPS (vuint32m4_t, 0)
  DEF_RVV_WU_OPS (vuint32m8_t, 0)
  
+DEF_RVV_WF_OPS (vfloat16mf4_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WF_OPS (vfloat16mf2_t, TARGET_ZVFH)
+DEF_RVV_WF_OPS (vfloat16m1_t, TARGET_ZVFH)
+DEF_RVV_WF_OPS (vfloat16m2_t, TARGET_ZVFH)
+DEF_RVV_WF_OPS (vfloat16m4_t, TARGET_ZVFH)
+DEF_RVV_WF_OPS (vfloat16m8_t, TARGET_ZVFH)
+
  DEF_RVV_WF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
  DEF_RVV_WF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
  DEF_RVV_WF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md

index 1ea4fcf68cd0179288ca3fd222a069d45e7c9b4b..f4946d84449cf41b4fecaefdd095d5e1b0225b93 100644 (file)
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -330,10 +330,18 @@
  ])
  
  (define_mode_iterator VWF [
+  (VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
+  (VNx2HF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx4HF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx8HF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx16HF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+  (VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
    (VNx1SF "TARGET_MIN_VLEN < 128") VNx2SF VNx4SF VNx8SF (VNx16SF "TARGET_MIN_VLEN > 32") (VNx32SF "TARGET_MIN_VLEN >= 128")
  ])
  
  (define_mode_iterator VWF_ZVE64 [
+  VNx1HF VNx2HF VNx4HF VNx8HF VNx16HF VNx32HF
    VNx1SF VNx2SF VNx4SF VNx8SF VNx16SF
  ])
  
@@ -1322,6 +1330,7 @@
    (VNx8HI "VNx4SI") (VNx16HI "VNx4SI") (VNx32HI "VNx4SI") (VNx64HI "VNx4SI")
    (VNx1SI "VNx2DI") (VNx2SI "VNx2DI") (VNx4SI "VNx2DI")
    (VNx8SI "VNx2DI") (VNx16SI "VNx2DI") (VNx32SI "VNx2DI")
+  (VNx1HF "VNx4SF") (VNx2HF "VNx4SF") (VNx4HF "VNx4SF") (VNx8HF "VNx4SF") (VNx16HF "VNx4SF") (VNx32HF "VNx4SF") (VNx64HF "VNx4SF")
    (VNx1SF "VNx2DF") (VNx2SF "VNx2DF")
    (VNx4SF "VNx2DF") (VNx8SF "VNx2DF") (VNx16SF "VNx2DF") (VNx32SF "VNx2DF")
  ])
@@ -1333,6 +1342,7 @@
    (VNx8HI "VNx2SI") (VNx16HI "VNx2SI") (VNx32HI "VNx2SI")
    (VNx1SI "VNx1DI") (VNx2SI "VNx1DI") (VNx4SI "VNx1DI")
    (VNx8SI "VNx1DI") (VNx16SI "VNx1DI")
+  (VNx1HF "VNx2SF") (VNx2HF "VNx2SF") (VNx4HF "VNx2SF") (VNx8HF "VNx2SF") (VNx16HF "VNx2SF") (VNx32HF "VNx2SF")
    (VNx1SF "VNx1DF") (VNx2SF "VNx1DF")
    (VNx4SF "VNx1DF") (VNx8SF "VNx1DF") (VNx16SF "VNx1DF")
  ])
@@ -1393,6 +1403,7 @@
    (VNx8HI "vnx4si") (VNx16HI "vnx4si") (VNx32HI "vnx4si") (VNx64HI "vnx4si")
    (VNx1SI "vnx2di") (VNx2SI "vnx2di") (VNx4SI "vnx2di")
    (VNx8SI "vnx2di") (VNx16SI "vnx2di") (VNx32SI "vnx2di")
+  (VNx1HF "vnx4sf") (VNx2HF "vnx4sf") (VNx4HF "vnx4sf") (VNx8HF "vnx4sf") (VNx16HF "vnx4sf") (VNx32HF "vnx4sf") (VNx64HF "vnx4sf")
    (VNx1SF "vnx2df") (VNx2SF "vnx2df")
    (VNx4SF "vnx2df") (VNx8SF "vnx2df") (VNx16SF "vnx2df") (VNx32SF "vnx2df")
  ])
@@ -1404,6 +1415,7 @@
    (VNx8HI "vnx2si") (VNx16HI "vnx2si") (VNx32HI "vnx2si")
    (VNx1SI "vnx1di") (VNx2SI "vnx1di") (VNx4SI "vnx1di")
    (VNx8SI "vnx1di") (VNx16SI "vnx1di")
+  (VNx1HF "vnx2sf") (VNx2HF "vnx2sf") (VNx4HF "vnx2sf") (VNx8HF "vnx2sf") (VNx16HF "vnx2sf") (VNx32HF "vnx2sf")
    (VNx1SF "vnx1df") (VNx2SF "vnx1df")
    (VNx4SF "vnx1df") (VNx8SF "vnx1df") (VNx16SF "vnx1df")
  ])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-intrinsic.c

index 0d244aac9ec064e001c34653e501ed993d7c17b0..2e86d1faaf17339a04f7c8c8cc60a934490b30e3 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-intrinsic.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-intrinsic.c
@@ -365,9 +365,57 @@ vfloat16m4_t test_vfncvt_f_xu_w_f16m4(vuint32m8_t src, size_t vl) {
    return __riscv_vfncvt_f_xu_w_f16m4(src, vl);
  }
  
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 43 } } */
+vfloat16m1_t test_vfredosum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+  return __riscv_vfredosum_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredosum_vs_f16m8_f16m1(vfloat16m8_t vector, vfloat16m1_t scalar, size_t vl) {
+  return __riscv_vfredosum_vs_f16m8_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredusum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+  return __riscv_vfredusum_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredusum_vs_f16m8_f16m1(vfloat16m8_t vector, vfloat16m1_t scalar, size_t vl) {
+  return __riscv_vfredusum_vs_f16m8_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmax_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+  return __riscv_vfredmax_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmax_vs_f16m8_f16m1(vfloat16m8_t vector, vfloat16m1_t scalar, size_t vl) {
+  return __riscv_vfredmax_vs_f16m8_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmin_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+  return __riscv_vfredmin_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmin_vs_f16m8_f16m1(vfloat16m8_t vector, vfloat16m1_t scalar, size_t vl) {
+  return __riscv_vfredmin_vs_f16m8_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfwredosum_vs_f16mf4_f32m1(vfloat16mf4_t vector, vfloat32m1_t scalar, size_t vl) {
+  return __riscv_vfwredosum_vs_f16mf4_f32m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfwredosum_vs_f16m8_f32m1(vfloat16m8_t vector, vfloat32m1_t scalar, size_t vl) {
+  return __riscv_vfwredosum_vs_f16m8_f32m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfwredusum_vs_f16mf4_f32m1(vfloat16mf4_t vector, vfloat32m1_t scalar, size_t vl) {
+  return __riscv_vfwredusum_vs_f16mf4_f32m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfwredusum_vs_f16m8_f32m1(vfloat16m8_t vector, vfloat32m1_t scalar, size_t vl) {
+  return __riscv_vfwredusum_vs_f16m8_f32m1(vector, scalar, vl);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 49 } } */
  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]} 11 } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]} 34 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]} 40 } } */
  /* { dg-final { scan-assembler-times {vfadd\.v[fv]\s+v[0-9]+,\s*v[0-9]+,\s*[vfa]+[0-9]+} 2 } } */
  /* { dg-final { scan-assembler-times {vfsub\.v[fv]\s+v[0-9]+,\s*v[0-9]+,\s*[vfa]+[0-9]+} 2 } } */
  /* { dg-final { scan-assembler-times {vfrsub\.vf\s+v[0-9]+,\s*v[0-9]+,\s*[vfa]+[0-9]+} 2 } } */
@@ -416,3 +464,9 @@ vfloat16m4_t test_vfncvt_f_xu_w_f16m4(vuint32m8_t src, size_t vl) {
  /* { dg-final { scan-assembler-times {vfwcvt\.xu\.f\.v\s+v[0-9]+,\s*v[0-9]+} 1 } } */
  /* { dg-final { scan-assembler-times {vfncvt\.x\.f\.w\s+v[0-9]+,\s*v[0-9]+} 1 } } */
  /* { dg-final { scan-assembler-times {vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredmax\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredmin\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfwredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfwredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
author	Pan Li <pan2.li@intel.com>
	Tue, 6 Jun 2023 02:07:37 +0000 (10:07 +0800)
committer	Pan Li <pan2.li@intel.com>
	Tue, 6 Jun 2023 02:11:38 +0000 (10:11 +0800)
gcc/config/riscv/riscv-vector-builtins-types.def		patch \| blob \| blame \| history
gcc/config/riscv/vector-iterators.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-intrinsic.c		patch \| blob \| blame \| history