;; - vfmsac.vf
;; - vfnmacc.vf
;; - vfnmsac.vf
+;; - vfwmacc.vf
+;; - vfwmsac.vf
;; =============================================================================
;; vfmadd.vf, vfmsub.vf, vfmacc.vf, vfmsac.vf
}
[(set_attr "type" "vfmuladd")]
)
+
+;; vfwmacc.vf, vfwmsac.vf
+(define_insn_and_split "*vfwmacc_vf_<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (plus_minus:VWEXTF
+ (mult:VWEXTF
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
+ (match_operand:<VSUBEL> 2 "register_operand"))))
+ (match_operand:VWEXTF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+ riscv_vector::emit_vlmax_insn (code_for_pred_widen_mul_scalar (<CODE>, <MODE>mode),
+ riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+ DONE;
+ }
+ [(set_attr "type" "vfwmuladd")]
+)
+
+;; Intermediate pattern for vfwmacc.vf and vfwmsac.vf used by combine
+(define_insn_and_split "*extend_vf_<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
+ (match_operand:<VSUBEL> 1 "register_operand"))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx tmp = gen_reg_rtx (<VEL>mode);
+ emit_insn (gen_extend<vsubel><vel>2(tmp, operands[1]));
+
+ rtx ops[] = {operands[0], tmp};
+ riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
+ riscv_vector::UNARY_OP, ops);
+ DONE;
+ }
+ [(set_attr "type" "vfwmuladd")]
+)
(RVVM1x2DF "rvvm1df")
])
+(define_mode_attr vsubel [
+ (RVVM8HI "qi") (RVVM4HI "qi") (RVVM2HI "qi") (RVVM1HI "qi") (RVVMF2HI "qi") (RVVMF4HI "qi")
+
+ (RVVM8SI "hi") (RVVM4SI "hi") (RVVM2SI "hi") (RVVM1SI "hi") (RVVMF2SI "hi")
+
+ (RVVM8SF "hf") (RVVM4SF "hf") (RVVM2SF "hf") (RVVM1SF "hf") (RVVMF2SF "hf")
+
+ (RVVM8DI "si") (RVVM4DI "si") (RVVM2DI "si") (RVVM1DI "si")
+
+ (RVVM8DF "sf") (RVVM4DF "sf") (RVVM2DF "sf") (RVVM1DF "sf")
+
+ ;; VLS modes.
+ (V1HI "qi") (V2HI "qi") (V4HI "qi") (V8HI "qi") (V16HI "qi") (V32HI "qi") (V64HI "qi") (V128HI "qi") (V256HI "qi")
+ (V512HI "qi") (V1024HI "qi") (V2048HI "qi")
+ (V1SI "hi") (V2SI "hi") (V4SI "hi") (V8SI "hi") (V16SI "hi") (V32SI "hi") (V64SI "hi") (V128SI "hi") (V256SI "hi")
+ (V512SI "hi") (V1024SI "hi")
+ (V1DI "si") (V2DI "si") (V4DI "si") (V8DI "si") (V16DI "si") (V32DI "si") (V64DI "si") (V128DI "si") (V256DI "si") (V512DI "si")
+
+ (V1SF "hf")
+ (V2SF "hf")
+ (V4SF "hf")
+ (V8SF "hf")
+ (V16SF "hf")
+ (V32SF "hf")
+ (V64SF "hf")
+ (V128SF "hf")
+ (V256SF "hf")
+ (V512SF "hf")
+ (V1024SF "hf")
+ (V1DF "sf")
+ (V2DF "sf")
+ (V4DF "sf")
+ (V8DF "sf")
+ (V16DF "sf")
+ (V32DF "sf")
+ (V64DF "sf")
+ (V128DF "sf")
+ (V256DF "sf")
+ (V512DF "sf")
+])
+
(define_mode_attr VSUBEL [
(RVVM8HI "QI") (RVVM4HI "QI") (RVVM2HI "QI") (RVVM1HI "QI") (RVVMF2HI "QI") (RVVMF4HI "QI")
(plus_minus:VWEXTF
(mult:VWEXTF
(float_extend:VWEXTF
- (vec_duplicate:<V_DOUBLE_TRUNC>
- (match_operand:<VSUBEL> 3 "register_operand" " f")))
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" " vr")))
+ (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" " vr"))
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
+ (match_operand:<VSUBEL> 3 "register_operand" " f"))))
(match_operand:VWEXTF 2 "register_operand" " 0"))
(match_dup 2)))]
"TARGET_VECTOR"
DEF_VF_MULOP_ACC_CASE_0 (_Float16, -, +, sac)
DEF_VF_MULOP_ACC_CASE_0 (_Float16, +, -, nacc)
DEF_VF_MULOP_ACC_CASE_0 (_Float16, -, -, nsac)
+DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, +, +, acc)
+DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, -, +, sac)
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmsac.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfnmacc.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfnmsac.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwmacc.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwmsac.vf} 1 } } */
DEF_VF_MULOP_ACC_CASE_0 (float, -, +, sac)
DEF_VF_MULOP_ACC_CASE_0 (float, +, -, nacc)
DEF_VF_MULOP_ACC_CASE_0 (float, -, -, nsac)
+DEF_VF_MULOP_WIDEN_CASE_0 (float, double, +, +, acc)
+DEF_VF_MULOP_WIDEN_CASE_0 (float, double, -, +, sac)
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmsac.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfnmacc.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfnmsac.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwmacc.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwmsac.vf} 1 } } */
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
/* { dg-final { scan-assembler-not {vfnmacc.vf} } } */
/* { dg-final { scan-assembler-not {vfnmsac.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmacc.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmsac.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.s.h} 2 } } */
+/* { dg-final { scan-assembler-times {vfmv.v.f} 10 } } */
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
/* { dg-final { scan-assembler-not {vfnmacc.vf} } } */
/* { dg-final { scan-assembler-not {vfnmsac.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmacc.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmsac.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.d.s} 2 } } */
+/* { dg-final { scan-assembler-times {vfmv.v.f} 10 } } */
DEF_VF_MULOP_ACC_CASE_1 (_Float16, -, +, sac, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (_Float16, +, -, nacc, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (_Float16, -, -, nsac, VF_MULOP_ACC_BODY_X128)
+DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, +, +, acc)
+DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, -, +, sac)
/* { dg-final { scan-assembler {vfmadd.vf} } } */
/* { dg-final { scan-assembler {vfmsub.vf} } } */
/* { dg-final { scan-assembler {vfmsac.vf} } } */
/* { dg-final { scan-assembler {vfnmacc.vf} } } */
/* { dg-final { scan-assembler {vfnmsac.vf} } } */
+/* { dg-final { scan-assembler {vfwmacc.vf} } } */
+/* { dg-final { scan-assembler {vfwmsac.vf} } } */
DEF_VF_MULOP_ACC_CASE_1 (float, -, +, sac, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (float, +, -, nacc, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (float, -, -, nsac, VF_MULOP_ACC_BODY_X128)
+DEF_VF_MULOP_WIDEN_CASE_1 (float, double, +, +, acc)
+DEF_VF_MULOP_WIDEN_CASE_1 (float, double, -, +, sac)
/* { dg-final { scan-assembler {vfmadd.vf} } } */
/* { dg-final { scan-assembler {vfmsub.vf} } } */
/* { dg-final { scan-assembler {vfmsac.vf} } } */
/* { dg-final { scan-assembler {vfnmacc.vf} } } */
/* { dg-final { scan-assembler {vfnmsac.vf} } } */
+/* { dg-final { scan-assembler {vfwmacc.vf} } } */
+/* { dg-final { scan-assembler {vfwmsac.vf} } } */
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
/* { dg-final { scan-assembler-not {vfnmacc.vf} } } */
/* { dg-final { scan-assembler-not {vfnmsac.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmacc.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmsac.vf} } } */
+/* { dg-final { scan-assembler {fcvt.s.h} } } */
/* { dg-final { scan-assembler-not {vfmsac.vf} } } */
/* { dg-final { scan-assembler-not {vfnmacc.vf} } } */
/* { dg-final { scan-assembler-not {vfnmsac.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmacc.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmsac.vf} } } */
+/* { dg-final { scan-assembler {fcvt.d.s} } } */
#define RUN_VF_MULOP_ACC_CASE_0_WRAP(T, NAME, out, in, x, n) \
RUN_VF_MULOP_ACC_CASE_0 (T, NAME, out, in, x, n)
+#define DEF_VF_MULOP_WIDEN_CASE_0(T1, T2, OP, NEG, NAME) \
+ void test_vf_mulop_widen_##NAME##_##T1##_case_0 (T2 *restrict out, \
+ T1 *restrict in, \
+ T1 *restrict f, unsigned n) \
+ { \
+ for (unsigned i = 0; i < n; i++) \
+ out[i] = NEG ((T2) * f * (T2) in[i] OP out[i]); \
+ }
+#define DEF_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, OP, NEG, NAME) \
+ DEF_VF_MULOP_WIDEN_CASE_0 (T1, T2, OP, NEG, NAME)
+#define RUN_VF_MULOP_WIDEN_CASE_0(T1, T2, NAME, out, in, x, n) \
+ test_vf_mulop_widen_##NAME##_##T1##_case_0 (out, in, x, n)
+#define RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, x, n) \
+ RUN_VF_MULOP_WIDEN_CASE_0 (T1, T2, NAME, out, in, x, n)
+
#define VF_MULOP_BODY(op, neg) \
out[k + 0] = neg (tmp * out[k + 0] op in[k + 0]); \
out[k + 1] = neg (tmp * out[k + 1] op in[k + 1]); \
#define DEF_VF_MULOP_ACC_CASE_1_WRAP(T, OP, NEG, NAME, BODY) \
DEF_VF_MULOP_ACC_CASE_1 (T, OP, NEG, NAME, BODY)
+#define DEF_VF_MULOP_WIDEN_CASE_1(TYPE1, TYPE2, OP, NEG, NAME) \
+ void test_vf_mulop_widen_##NAME##_##TYPE1##_##TYPE2##_case_1 ( \
+ TYPE2 *__restrict dst, TYPE2 *__restrict dst2, TYPE2 *__restrict dst3, \
+ TYPE2 *__restrict dst4, TYPE1 *__restrict a, TYPE1 *__restrict b, \
+ TYPE1 *__restrict a2, TYPE1 *__restrict b2, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ { \
+ dst[i] = NEG ((TYPE2) * a * (TYPE2) b[i] OP dst[i]); \
+ dst2[i] = NEG ((TYPE2) * a2 * (TYPE2) b[i] OP dst2[i]); \
+ dst3[i] = NEG ((TYPE2) * a2 * (TYPE2) a[i] OP dst3[i]); \
+ dst4[i] = NEG ((TYPE2) * a * (TYPE2) b2[i] OP dst4[i]); \
+ } \
+ }
+
#endif
--- /dev/null
+#ifndef HAVE_DEFINED_VF_MULOP_WIDEN_RUN_H
+#define HAVE_DEFINED_VF_MULOP_WIDEN_RUN_H
+
+#include <assert.h>
+
+#define N 512
+
+int main ()
+{
+ T1 f[N];
+ T1 in[N];
+ T2 out[N];
+ T2 out2[N];
+
+ for (int i = 0; i < N; i++)
+ {
+ f[i] = LIMIT + i % 8723;
+ in[i] = LIMIT + i & 1964;
+ out[i] = LIMIT + i & 628;
+ out2[i] = LIMIT + i & 628;
+ asm volatile ("" ::: "memory");
+ }
+
+ TEST_RUN (T1, T2, NAME, out, in, f, N);
+
+ for (int i = 0; i < N; i++)
+ assert (out[i] == NEG(((T2) *f * (T2) in[i]) OP out2[i]));
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */
+
+#include "vf_mulop.h"
+
+#define T1 _Float16
+#define T2 float
+#define NAME acc
+#define OP +
+#define NEG +
+
+DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+
+#include "vf_mulop_widen_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_mulop.h"
+
+#define T1 float
+#define T2 double
+#define NAME acc
+#define OP +
+#define NEG +
+
+DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "vf_mulop_widen_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */
+
+#include "vf_mulop.h"
+
+#define T1 _Float16
+#define T2 float
+#define NAME sac
+#define OP -
+#define NEG +
+
+DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+
+#include "vf_mulop_widen_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_mulop.h"
+
+#define T1 float
+#define T2 double
+#define NAME sac
+#define OP -
+#define NEG +
+
+DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "vf_mulop_widen_run.h"