]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Combine vsext.vf2 and vsll.vi to vwsll.vi on ZVBB
authorPan Li <pan2.li@intel.com>
Wed, 15 Oct 2025 14:16:11 +0000 (22:16 +0800)
committerPan Li <pan2.li@intel.com>
Fri, 7 Nov 2025 12:23:45 +0000 (20:23 +0800)
The vwsll.vi of zvbb ext take zero extend before ashift.  But
we can still do some combine based on sign extend if and only
if the shift is imm and the sign extend bits are all shifted.
For example as below

  vsetvli   zero, zero, e32, m1, ta, ma
  vsext.vf2 v1, v2
  vsll.vi   v1, v1, 16

If the ashift bits is greater than or equals to truncated bitsize,
(aka 16 for e32), the sign or zero extend bits will be ashifted
and never pollute the final result.  Then we have

  vsetvli   zero, zero, e32, m1, ta, ma
  vwsll.vi  v1, v2, 16

PR target.121959

The below test suites are passed for this patch series.
 * The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*vwsll_sign_extend_<mode>): Add
pattern to combine vsext.vf2 and vslli.vi to vwsll.vi.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr121959-1.c: New test.
* gcc.target/riscv/rvv/autovec/pr121959-2.c: New test.
* gcc.target/riscv/rvv/autovec/pr121959-3.c: New test.
* gcc.target/riscv/rvv/autovec/pr121959-4.c: New test.
* gcc.target/riscv/rvv/autovec/pr121959-5.c: New test.
* gcc.target/riscv/rvv/autovec/pr121959-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/pr121959.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
gcc/config/riscv/autovec-opt.md
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h [new file with mode: 0644]

index 063c9a0122b61df4f12107110c8f82bc9f1938b5..52ab79c555a6a5549fdcc9f35d701fe82fcad54d 100644 (file)
   }
   [(set_attr "type" "vfalu")]
 )
+
+;; Combine vsext.vf + vsll.vi to vwsll.vi depends on ZVBB.
+;; The vwsll.vi is zero extend, thus only the ashift bits
+;; is equal or greater than double truncated bits is valid.
+;; Appears in the satd function of x264.
+(define_insn_and_split "*vwsll_sign_extend_<mode>"
+  [(set (match_operand:VWEXTI               0 "register_operand")
+       (ashift:VWEXTI
+         (sign_extend:VWEXTI
+           (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+         (match_operand                    2 "const_int_operand")))]
+  "TARGET_VECTOR && TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    int imm = INTVAL (operands[2]);
+    int trunc_prec = GET_MODE_PRECISION (GET_MODE_INNER (<V_DOUBLE_TRUNC>mode));
+
+    if (imm >= trunc_prec)
+      {
+       insn_code icode = code_for_pred_vwsll_scalar (<MODE>mode);
+       emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+      }
+    else
+      {
+       insn_code icode = code_for_pred_vf2 (SIGN_EXTEND, <MODE>mode);
+       rtx extend = gen_reg_rtx (<MODE>mode);
+       rtx unary_ops[] = {extend, operands[1]};
+       riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP,
+                                      unary_ops);
+
+       icode = code_for_pred_scalar (ASHIFT, <MODE>mode);
+       rtx binary_ops[] = {operands[0], extend, operands[2]};
+       riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP,
+                                      binary_ops);
+      }
+
+    DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c
new file mode 100644 (file)
index 0000000..a42d7c4
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16)
+
+/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c
new file mode 100644 (file)
index 0000000..2a3ef8d
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c
new file mode 100644 (file)
index 0000000..59a930a
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17)
+
+/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c
new file mode 100644 (file)
index 0000000..59a6d36
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c
new file mode 100644 (file)
index 0000000..a9319a3
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 15)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c
new file mode 100644 (file)
index 0000000..77fd95b
--- /dev/null
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "pr121959.h"
+
+#define WT int32_t
+#define NT uint8_t
+#define IMM 16
+#define N 16
+
+DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM)
+
+NT g_data[][2][N] = {
+  {
+    /* a */
+    {
+        2,   2,   2,   1,
+      255, 255, 255, 255,
+      128, 128, 128, 128,
+      127, 127, 127, 127,
+    },
+    /* b */
+    {
+      1, 1, 1, 1,
+      0, 0, 0, 0,
+      2, 2, 2, 2,
+      7, 7, 7, 7,
+    },
+  },
+};
+
+WT g_expect[][N] = {
+  /* 0 */
+  {
+       65536,    65536,    65536,    65536,
+    16711680, 16711680, 16711680, 16711680,
+     8257536,  8257536,  8257536,  8257536,
+     7864320,  7864320,  7864320,  7864320,
+  },
+};
+
+int
+main ()
+{
+  unsigned i, k;
+  WT out[N];
+
+  for (i = 0; i < sizeof (g_data) / sizeof (g_data[0]); i++)
+    {
+      NT *a = g_data[i][0];
+      NT *b = g_data[i][1];
+      WT *expect = g_expect[i];
+
+      RUN_VWSLL_FUNC_0_WRAP (WT, NT, IMM, out, a, b, N);
+
+      for (k = 0; k < N; k++)
+       if (out[k] != expect[k])
+         __builtin_abort ();
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h
new file mode 100644 (file)
index 0000000..10b1b62
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef HAVE_DEFINED_PR121959_H
+#define HAVE_DEFINED_PR121959_H
+
+#include <stdint.h>
+
+#define DEF_VWSLL_FUNC_0(WT, NT, IMM)                   \
+void                                                    \
+test_from_##NT##_to_##WT##_##IMM##_0(WT * restrict res, \
+                                    NT * restrict a,   \
+                                    NT * restrict b,   \
+                                    int n)             \
+{                                                       \
+  for (int i = 0; i < n; i++)                           \
+    {                                                   \
+      res[i] = (a[i] - b[i]) << IMM;                    \
+    }                                                   \
+}
+#define DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) DEF_VWSLL_FUNC_0(WT, NT, IMM)
+#define RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) \
+  test_from_##NT##_to_##WT##_##IMM##_0(res, a, b, n)
+#define RUN_VWSLL_FUNC_0_WRAP(WT, NT, IMM, res, a, b, n) \
+  RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n)
+
+#endif