From dd305514bbca46a39d020018e1bef0cfa15c99c8 Mon Sep 17 00:00:00 2001 From: Pan Li Date: Wed, 15 Oct 2025 22:16:11 +0800 Subject: [PATCH] RISC-V: Combine vsext.vf2 and vsll.vi to vwsll.vi on ZVBB The vwsll.vi of zvbb ext take zero extend before ashift. But we can still do some combine based on sign extend if and only if the shift is imm and the sign extend bits are all shifted. For example as below vsetvli zero, zero, e32, m1, ta, ma vsext.vf2 v1, v2 vsll.vi v1, v1, 16 If the ashift bits is greater than or equals to truncated bitsize, (aka 16 for e32), the sign or zero extend bits will be ashifted and never pollute the final result. Then we have vsetvli zero, zero, e32, m1, ta, ma vwsll.vi v1, v2, 16 PR target.121959 The below test suites are passed for this patch series. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/autovec-opt.md (*vwsll_sign_extend_): Add pattern to combine vsext.vf2 and vslli.vi to vwsll.vi. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr121959-1.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-2.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-3.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-4.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-5.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-run-1.c: New test. * gcc.target/riscv/rvv/autovec/pr121959.h: New test. Signed-off-by: Pan Li --- gcc/config/riscv/autovec-opt.md | 41 ++++++++++++ .../gcc.target/riscv/rvv/autovec/pr121959-1.c | 9 +++ .../gcc.target/riscv/rvv/autovec/pr121959-2.c | 9 +++ .../gcc.target/riscv/rvv/autovec/pr121959-3.c | 9 +++ .../gcc.target/riscv/rvv/autovec/pr121959-4.c | 9 +++ .../gcc.target/riscv/rvv/autovec/pr121959-5.c | 9 +++ .../riscv/rvv/autovec/pr121959-run-1.c | 65 +++++++++++++++++++ .../gcc.target/riscv/rvv/autovec/pr121959.h | 24 +++++++ 8 files changed, 175 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 063c9a0122b..52ab79c555a 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -2424,3 +2424,44 @@ } [(set_attr "type" "vfalu")] ) + +;; Combine vsext.vf + vsll.vi to vwsll.vi depends on ZVBB. +;; The vwsll.vi is zero extend, thus only the ashift bits +;; is equal or greater than double truncated bits is valid. +;; Appears in the satd function of x264. +(define_insn_and_split "*vwsll_sign_extend_" + [(set (match_operand:VWEXTI 0 "register_operand") + (ashift:VWEXTI + (sign_extend:VWEXTI + (match_operand: 1 "register_operand")) + (match_operand 2 "const_int_operand")))] + "TARGET_VECTOR && TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + int imm = INTVAL (operands[2]); + int trunc_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode)); + + if (imm >= trunc_prec) + { + insn_code icode = code_for_pred_vwsll_scalar (mode); + emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); + } + else + { + insn_code icode = code_for_pred_vf2 (SIGN_EXTEND, mode); + rtx extend = gen_reg_rtx (mode); + rtx unary_ops[] = {extend, operands[1]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, + unary_ops); + + icode = code_for_pred_scalar (ASHIFT, mode); + rtx binary_ops[] = {operands[0], extend, operands[2]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, + binary_ops); + } + + DONE; + } +) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c new file mode 100644 index 00000000000..a42d7c4de60 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16) + +/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c new file mode 100644 index 00000000000..2a3ef8d2617 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16) + +/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vwsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c new file mode 100644 index 00000000000..59a930a1efa --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17) + +/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c new file mode 100644 index 00000000000..59a6d365af4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17) + +/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vwsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c new file mode 100644 index 00000000000..a9319a3a959 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 15) + +/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vwsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c new file mode 100644 index 00000000000..77fd95b8ebb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-require-effective-target "riscv_zvbb_ok" } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvbb" } */ +/* { dg-additional-options "-std=c99" } */ + +#include "pr121959.h" + +#define WT int32_t +#define NT uint8_t +#define IMM 16 +#define N 16 + +DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) + +NT g_data[][2][N] = { + { + /* a */ + { + 2, 2, 2, 1, + 255, 255, 255, 255, + 128, 128, 128, 128, + 127, 127, 127, 127, + }, + /* b */ + { + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + 7, 7, 7, 7, + }, + }, +}; + +WT g_expect[][N] = { + /* 0 */ + { + 65536, 65536, 65536, 65536, + 16711680, 16711680, 16711680, 16711680, + 8257536, 8257536, 8257536, 8257536, + 7864320, 7864320, 7864320, 7864320, + }, +}; + +int +main () +{ + unsigned i, k; + WT out[N]; + + for (i = 0; i < sizeof (g_data) / sizeof (g_data[0]); i++) + { + NT *a = g_data[i][0]; + NT *b = g_data[i][1]; + WT *expect = g_expect[i]; + + RUN_VWSLL_FUNC_0_WRAP (WT, NT, IMM, out, a, b, N); + + for (k = 0; k < N; k++) + if (out[k] != expect[k]) + __builtin_abort (); + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h new file mode 100644 index 00000000000..10b1b623979 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h @@ -0,0 +1,24 @@ +#ifndef HAVE_DEFINED_PR121959_H +#define HAVE_DEFINED_PR121959_H + +#include + +#define DEF_VWSLL_FUNC_0(WT, NT, IMM) \ +void \ +test_from_##NT##_to_##WT##_##IMM##_0(WT * restrict res, \ + NT * restrict a, \ + NT * restrict b, \ + int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + res[i] = (a[i] - b[i]) << IMM; \ + } \ +} +#define DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) DEF_VWSLL_FUNC_0(WT, NT, IMM) +#define RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) \ + test_from_##NT##_to_##WT##_##IMM##_0(res, a, b, n) +#define RUN_VWSLL_FUNC_0_WRAP(WT, NT, IMM, res, a, b, n) \ + RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) + +#endif -- 2.47.3