}
[(set_attr "type" "vfalu")]
)
+
+;; Combine vsext.vf + vsll.vi to vwsll.vi depends on ZVBB.
+;; The vwsll.vi is zero extend, thus only the ashift bits
+;; is equal or greater than double truncated bits is valid.
+;; Appears in the satd function of x264.
+(define_insn_and_split "*vwsll_sign_extend_<mode>"
+ [(set (match_operand:VWEXTI 0 "register_operand")
+ (ashift:VWEXTI
+ (sign_extend:VWEXTI
+ (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+ (match_operand 2 "const_int_operand")))]
+ "TARGET_VECTOR && TARGET_ZVBB && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ int imm = INTVAL (operands[2]);
+ int trunc_prec = GET_MODE_PRECISION (GET_MODE_INNER (<V_DOUBLE_TRUNC>mode));
+
+ if (imm >= trunc_prec)
+ {
+ insn_code icode = code_for_pred_vwsll_scalar (<MODE>mode);
+ emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+ }
+ else
+ {
+ insn_code icode = code_for_pred_vf2 (SIGN_EXTEND, <MODE>mode);
+ rtx extend = gen_reg_rtx (<MODE>mode);
+ rtx unary_ops[] = {extend, operands[1]};
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP,
+ unary_ops);
+
+ icode = code_for_pred_scalar (ASHIFT, <MODE>mode);
+ rtx binary_ops[] = {operands[0], extend, operands[2]};
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP,
+ binary_ops);
+ }
+
+ DONE;
+ }
+)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16)
+
+/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vsll.vi} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17)
+
+/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vsll.vi} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 15)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "pr121959.h"
+
+#define WT int32_t
+#define NT uint8_t
+#define IMM 16
+#define N 16
+
+DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM)
+
+NT g_data[][2][N] = {
+ {
+ /* a */
+ {
+ 2, 2, 2, 1,
+ 255, 255, 255, 255,
+ 128, 128, 128, 128,
+ 127, 127, 127, 127,
+ },
+ /* b */
+ {
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 7, 7, 7, 7,
+ },
+ },
+};
+
+WT g_expect[][N] = {
+ /* 0 */
+ {
+ 65536, 65536, 65536, 65536,
+ 16711680, 16711680, 16711680, 16711680,
+ 8257536, 8257536, 8257536, 8257536,
+ 7864320, 7864320, 7864320, 7864320,
+ },
+};
+
+int
+main ()
+{
+ unsigned i, k;
+ WT out[N];
+
+ for (i = 0; i < sizeof (g_data) / sizeof (g_data[0]); i++)
+ {
+ NT *a = g_data[i][0];
+ NT *b = g_data[i][1];
+ WT *expect = g_expect[i];
+
+ RUN_VWSLL_FUNC_0_WRAP (WT, NT, IMM, out, a, b, N);
+
+ for (k = 0; k < N; k++)
+ if (out[k] != expect[k])
+ __builtin_abort ();
+ }
+
+ return 0;
+}
--- /dev/null
+#ifndef HAVE_DEFINED_PR121959_H
+#define HAVE_DEFINED_PR121959_H
+
+#include <stdint.h>
+
+#define DEF_VWSLL_FUNC_0(WT, NT, IMM) \
+void \
+test_from_##NT##_to_##WT##_##IMM##_0(WT * restrict res, \
+ NT * restrict a, \
+ NT * restrict b, \
+ int n) \
+{ \
+ for (int i = 0; i < n; i++) \
+ { \
+ res[i] = (a[i] - b[i]) << IMM; \
+ } \
+}
+#define DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) DEF_VWSLL_FUNC_0(WT, NT, IMM)
+#define RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) \
+ test_from_##NT##_to_##WT##_##IMM##_0(res, a, b, n)
+#define RUN_VWSLL_FUNC_0_WRAP(WT, NT, IMM, res, a, b, n) \
+ RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n)
+
+#endif