From: Juzhe-Zhong Date: Wed, 28 Jun 2023 04:15:12 +0000 (+0800) Subject: RISC-V: Support vfwmul.vv combine lowering X-Git-Tag: basepoints/gcc-15~7878 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bc32918b063b9fa3dffc8815478a81df6ad999ca;p=thirdparty%2Fgcc.git RISC-V: Support vfwmul.vv combine lowering Consider the following complicate case: __attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 ( \ TYPE1 *__restrict dst, TYPE1 *__restrict dst2, TYPE1 *__restrict dst3, \ TYPE1 *__restrict dst4, TYPE2 *__restrict a, TYPE2 *__restrict b, \ TYPE2 *__restrict a2, TYPE2 *__restrict b2, int n) \ { \ for (int i = 0; i < n; i++) \ { \ dst[i] = (TYPE1) a[i] * (TYPE1) b[i]; \ dst2[i] = (TYPE1) a2[i] * (TYPE1) b[i]; \ dst3[i] = (TYPE1) a2[i] * (TYPE1) a[i]; \ dst4[i] = (TYPE1) a[i] * (TYPE1) b2[i]; \ } \ } TEST_TYPE (double, float) Such complicate situation, Combine PASS can not combine extension of both operands on the fly. So the combine PASS will first try to combine one of the combine extension, and then combine the other. The combine flow is as follows: Original IR: (set (reg 0) (float_extend: (reg 1)) (set (reg 3) (float_extend: (reg 2)) (set (reg 4) (mult: (reg 0) (reg 3)) First step of combine: (set (reg 3) (float_extend: (reg 2)) (set (reg 4) (mult: (float_extend: (reg 1) (reg 3)) Second step of combine: (set (reg 4) (mult: (float_extend: (reg 1) (float_extend: (reg 2)) So, to enhance the combine optimization, we add a "pseudo vwfmul.wv" RTL pattern in autovec-opt.md which is (set (reg 0) (mult (float_extend (reg 1) (reg 2)))). gcc/ChangeLog: * config/riscv/autovec-opt.md (@pred_single_widen_mul): Change "@" into "*" in pattern name which simplifies build files. (*pred_single_widen_mul): Ditto. (*pred_single_widen_mul): New pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/widen/widen-3.c: Add floating-point. * gcc.target/riscv/rvv/autovec/widen/widen-7.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/widen_run-3.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/widen_run-7.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c: New test. --- diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 80b85fa2de88..a362812bf05a 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -406,6 +406,45 @@ [(set_attr "type" "vimovvx") (set_attr "mode" "")]) +;; We don't have vfwmul.wv instruction like vfwadd.wv in RVV. +;; This pattern is an intermediate RTL IR as a pseudo vfwmul.wv to enhance +;; optimization of instructions combine. +(define_insn_and_split "*pred_single_widen_mul" + [(set (match_operand:VWEXTF 0 "register_operand" "=&vr, &vr") + (if_then_else:VWEXTF + (unspec: + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) + (mult:VWEXTF + (float_extend:VWEXTF + (match_operand: 4 "register_operand" " vr, vr")) + (match_operand:VWEXTF 3 "register_operand" " vr, vr")) + (match_operand:VWEXTF 2 "vector_merge_operand" " vu, 0")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code icode = code_for_pred_extend (mode); + rtx tmp = gen_reg_rtx (mode); + rtx ops[] = {tmp, operands[4]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ops); + + emit_insn (gen_pred (MULT, mode, operands[0], operands[1], operands[2], + operands[3], tmp, operands[5], operands[6], + operands[7], operands[8], operands[9])); + DONE; + } + [(set_attr "type" "vfwmul") + (set_attr "mode" "")]) + ;; ------------------------------------------------------------------------- ;; ---- [FP] VFWMACC ;; ------------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c index 609a5c09f70d..b2b144059022 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */ #include @@ -19,9 +19,12 @@ TEST_TYPE (int32_t, int16_t) \ TEST_TYPE (uint32_t, uint16_t) \ TEST_TYPE (int64_t, int32_t) \ - TEST_TYPE (uint64_t, uint32_t) + TEST_TYPE (uint64_t, uint32_t) \ + TEST_TYPE (float, _Float16) \ + TEST_TYPE (double, float) TEST_ALL () /* { dg-final { scan-assembler-times {\tvwmul\.vv} 3 } } */ /* { dg-final { scan-assembler-times {\tvwmulu\.vv} 3 } } */ +/* { dg-final { scan-assembler-times {\tvfwmul\.vv} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c index cc43d9ba3fe7..3806e8b98ee6 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */ #include @@ -19,9 +19,12 @@ TEST_TYPE (int32_t, int16_t) \ TEST_TYPE (uint32_t, uint16_t) \ TEST_TYPE (int64_t, int32_t) \ - TEST_TYPE (uint64_t, uint32_t) + TEST_TYPE (uint64_t, uint32_t) \ + TEST_TYPE (float, _Float16) \ + TEST_TYPE (double, float) TEST_ALL () /* { dg-final { scan-assembler-times {\tvsext\.vf2} 3 } } */ /* { dg-final { scan-assembler-times {\tvzext\.vf2} 3 } } */ +/* { dg-final { scan-assembler-times {\tvfwcvt} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c index e1fd79430c38..1515374890d6 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */ #include @@ -24,9 +24,12 @@ TEST_TYPE (int32_t, int16_t) \ TEST_TYPE (uint32_t, uint16_t) \ TEST_TYPE (int64_t, int32_t) \ - TEST_TYPE (uint64_t, uint32_t) + TEST_TYPE (uint64_t, uint32_t) \ + TEST_TYPE (float, _Float16) \ + TEST_TYPE (double, float) TEST_ALL () /* { dg-final { scan-assembler-times {\tvwmul\.vv} 12 } } */ /* { dg-final { scan-assembler-times {\tvwmulu\.vv} 12 } } */ +/* { dg-final { scan-assembler-times {\tvfwmul\.vv} 8 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c index beb0cc2b58b1..b7dd60fa8e86 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c @@ -1,5 +1,5 @@ /* { dg-do run { target { riscv_vector } } } */ -/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */ #include #include "widen-3.c" @@ -25,7 +25,8 @@ RUN (int32_t, int16_t, -32768) \ RUN (uint32_t, uint16_t, 65535) \ RUN (int64_t, int32_t, -2147483648) \ - RUN (uint64_t, uint32_t, 4294967295) + RUN (uint64_t, uint32_t, 4294967295) \ + RUN (double, float, -2147483648) int main () diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c index 4abddd5d7183..ab29f4a0f709 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c @@ -1,5 +1,5 @@ /* { dg-do run { target { riscv_vector } } } */ -/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */ #include #include "widen-7.c" @@ -25,7 +25,8 @@ RUN (int32_t, int16_t, -32768) \ RUN (uint32_t, uint16_t, 65535) \ RUN (int64_t, int32_t, -2147483648) \ - RUN (uint64_t, uint32_t, 4294967295) + RUN (uint64_t, uint32_t, 4294967295) \ + RUN (double, float, -2147483648) int main () diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c new file mode 100644 index 000000000000..c3efd0b97bfc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c @@ -0,0 +1,28 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */ + +#include +#include "widen-3.c" + +#define SZ 512 + +#define RUN(TYPE1, TYPE2, LIMIT) \ + TYPE2 a##TYPE2[SZ]; \ + TYPE2 b##TYPE2[SZ]; \ + TYPE1 dst##TYPE1[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE2[i] = LIMIT + i % 8723; \ + b##TYPE2[i] = LIMIT + i & 1964; \ + } \ + vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i])); + +#define RUN_ALL() RUN (float, _Float16, -32768) + +int +main () +{ + RUN_ALL () +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c new file mode 100644 index 000000000000..60e2401c0885 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c @@ -0,0 +1,28 @@ +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */ + +#include +#include "widen-7.c" + +#define SZ 512 + +#define RUN(TYPE1, TYPE2, LIMIT) \ + TYPE2 a##TYPE2[SZ]; \ + TYPE1 b##TYPE1[SZ]; \ + TYPE1 dst##TYPE1[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE2[i] = LIMIT + i % LIMIT; \ + b##TYPE1[i] = LIMIT + i & LIMIT; \ + } \ + vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE1, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (dst##TYPE1[i] == (((TYPE1) a##TYPE2[i]) * b##TYPE1[i])); + +#define RUN_ALL() RUN (float, _Float16, -32768) + +int +main () +{ + RUN_ALL () +}