"vmv.x.s\t%0,%1"
[(set_attr "type" "vimovvx")
(set_attr "mode" "<MODE>")])
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] VFWMACC
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfwmacc.vv
+;; -------------------------------------------------------------------------
+
+;; Combine ext + ext + fma ===> widen fma.
+;; Most of circumstantces, LoopVectorizer will generate the following IR:
+;; vect__8.176_40 = (vector([2,2]) double) vect__7.175_41;
+;; vect__11.180_35 = (vector([2,2]) double) vect__10.179_36;
+;; vect__13.182_33 = .FMA (vect__11.180_35, vect__8.176_40, vect__4.172_45);
+(define_insn_and_split "*double_widen_fma<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (fma:VWEXTF
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
+ (match_operand:VWEXTF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_widen_mul (PLUS, <MODE>mode),
+ riscv_vector::RVV_WIDEN_TERNOP, operands);
+ DONE;
+ }
+ [(set_attr "type" "vfwmuladd")
+ (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+
+;; This helps to match ext + fma.
+(define_insn_and_split "*single_widen_fma<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (fma:VWEXTF
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+ (match_operand:VWEXTF 3 "register_operand")
+ (match_operand:VWEXTF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ insn_code icode = code_for_pred_extend (<MODE>mode);
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ rtx ext_ops[] = {tmp, operands[2]};
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ext_ops);
+
+ rtx dst = expand_ternary_op (<MODE>mode, fma_optab, tmp, operands[3],
+ operands[1], operands[0], 0);
+ emit_move_insn (operands[0], dst);
+ DONE;
+ }
+ [(set_attr "type" "vfwmuladd")
+ (set_attr "mode" "<V_DOUBLE_TRUNC>")])
/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
#include <stdint-gcc.h>
TEST_TYPE (int32_t, int16_t) \
TEST_TYPE (uint32_t, uint16_t) \
TEST_TYPE (int64_t, int32_t) \
- TEST_TYPE (uint64_t, uint32_t)
+ TEST_TYPE (uint64_t, uint32_t) \
+ TEST_TYPE (float, _Float16) \
+ TEST_TYPE (double, float)
TEST_ALL ()
/* { dg-final { scan-assembler-times {\tvwmacc\.vv} 3 } } */
/* { dg-final { scan-assembler-times {\tvwmaccu\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfwmacc\.vv} 2 } } */
/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
#include <stdint-gcc.h>
TEST_TYPE (int32_t, int16_t) \
TEST_TYPE (uint32_t, uint16_t) \
TEST_TYPE (int64_t, int32_t) \
- TEST_TYPE (uint64_t, uint32_t)
+ TEST_TYPE (uint64_t, uint32_t) \
+ TEST_TYPE (float, _Float16) \
+ TEST_TYPE (double, float)
TEST_ALL ()
/* { dg-final { scan-assembler-times {\tvwmacc\.vv} 12 } } */
/* { dg-final { scan-assembler-times {\tvwmaccu\.vv} 12 } } */
+/* { dg-final { scan-assembler-times {\tvfwmacc\.vv} 8 } } */
/* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
#include <assert.h>
#include "widen-8.c"
RUN (int32_t, int16_t, -32768) \
RUN (uint32_t, uint16_t, 65535) \
RUN (int64_t, int32_t, -2147483648) \
- RUN (uint64_t, uint32_t, 4294967295)
+ RUN (uint64_t, uint32_t, 4294967295) \
+ RUN (double, float, -2147483648)
int
main ()
--- /dev/null
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
+
+#include <assert.h>
+#include "widen-8.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT) \
+ TYPE2 a##TYPE2[SZ]; \
+ TYPE2 b##TYPE2[SZ]; \
+ TYPE1 dst##TYPE1[SZ]; \
+ TYPE1 dst2##TYPE1[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE2[i] = LIMIT + i % 8723; \
+ b##TYPE2[i] = LIMIT + i & 1964; \
+ dst##TYPE1[i] = LIMIT + i & 628; \
+ dst2##TYPE1[i] = LIMIT + i & 628; \
+ } \
+ vwmacc_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (dst##TYPE1[i] \
+ == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]) + dst2##TYPE1[i]);
+
+#define RUN_ALL() RUN (float, _Float16, -32768)
+
+int
+main ()
+{
+ RUN_ALL ()
+}