}
[(set_attr "type" "vfwmuladd")
(set_attr "mode" "<V_DOUBLE_TRUNC>")])
-
-;; -------------------------------------------------------------------------
-;; ---- [FP] VFWNMSAC
-;; -------------------------------------------------------------------------
-;; Includes:
-;; - vfwnmsac.vv
-;; -------------------------------------------------------------------------
-
-;; Combine ext + ext + fnma ===> widen fnma.
-;; Most of circumstantces, LoopVectorizer will generate the following IR:
-;; vect__8.176_40 = (vector([2,2]) double) vect__7.175_41;
-;; vect__11.180_35 = (vector([2,2]) double) vect__10.179_36;
-;; vect__13.182_33 = .FNMA (vect__11.180_35, vect__8.176_40, vect__4.172_45);
-(define_insn_and_split "*double_widen_fnma<mode>"
- [(set (match_operand:VWEXTF 0 "register_operand")
- (fma:VWEXTF
- (neg:VWEXTF
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
- (match_operand:VWEXTF 1 "register_operand")))]
- "TARGET_VECTOR && can_create_pseudo_p ()"
- "#"
- "&& 1"
- [(const_int 0)]
- {
- riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_widen_mul_neg (PLUS, <MODE>mode),
- riscv_vector::RVV_WIDEN_TERNOP, operands);
- DONE;
- }
- [(set_attr "type" "vfwmuladd")
- (set_attr "mode" "<V_DOUBLE_TRUNC>")])
-
-;; This helps to match ext + fnma.
-(define_insn_and_split "*single_widen_fnma<mode>"
- [(set (match_operand:VWEXTF 0 "register_operand")
- (fma:VWEXTF
- (neg:VWEXTF
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
- (match_operand:VWEXTF 3 "register_operand")
- (match_operand:VWEXTF 1 "register_operand")))]
- "TARGET_VECTOR && can_create_pseudo_p ()"
- "#"
- "&& 1"
- [(const_int 0)]
- {
- insn_code icode = code_for_pred_extend (<MODE>mode);
- rtx tmp = gen_reg_rtx (<MODE>mode);
- rtx ext_ops[] = {tmp, operands[2]};
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ext_ops);
-
- rtx dst = expand_ternary_op (<MODE>mode, fnma_optab, tmp, operands[3],
- operands[1], operands[0], 0);
- emit_move_insn (operands[0], dst);
- DONE;
- }
- [(set_attr "type" "vfwmuladd")
- (set_attr "mode" "<V_DOUBLE_TRUNC>")])
-
-;; -------------------------------------------------------------------------
-;; ---- [FP] VFWMSAC
-;; -------------------------------------------------------------------------
-;; Includes:
-;; - vfwmsac.vv
-;; -------------------------------------------------------------------------
-
-;; Combine ext + ext + fms ===> widen fms.
-;; Most of circumstantces, LoopVectorizer will generate the following IR:
-;; vect__8.176_40 = (vector([2,2]) double) vect__7.175_41;
-;; vect__11.180_35 = (vector([2,2]) double) vect__10.179_36;
-;; vect__13.182_33 = .FMS (vect__11.180_35, vect__8.176_40, vect__4.172_45);
-(define_insn_and_split "*double_widen_fms<mode>"
- [(set (match_operand:VWEXTF 0 "register_operand")
- (fma:VWEXTF
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
- (neg:VWEXTF
- (match_operand:VWEXTF 1 "register_operand"))))]
- "TARGET_VECTOR && can_create_pseudo_p ()"
- "#"
- "&& 1"
- [(const_int 0)]
- {
- riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_widen_mul (MINUS, <MODE>mode),
- riscv_vector::RVV_WIDEN_TERNOP, operands);
- DONE;
- }
- [(set_attr "type" "vfwmuladd")
- (set_attr "mode" "<V_DOUBLE_TRUNC>")])
-
-;; This helps to match ext + fms.
-(define_insn_and_split "*single_widen_fms<mode>"
- [(set (match_operand:VWEXTF 0 "register_operand")
- (fma:VWEXTF
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
- (match_operand:VWEXTF 3 "register_operand")
- (neg:VWEXTF
- (match_operand:VWEXTF 1 "register_operand"))))]
- "TARGET_VECTOR && can_create_pseudo_p ()"
- "#"
- "&& 1"
- [(const_int 0)]
- {
- insn_code icode = code_for_pred_extend (<MODE>mode);
- rtx tmp = gen_reg_rtx (<MODE>mode);
- rtx ext_ops[] = {tmp, operands[2]};
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ext_ops);
-
- rtx dst = expand_ternary_op (<MODE>mode, fms_optab, tmp, operands[3],
- operands[1], operands[0], 0);
- emit_move_insn (operands[0], dst);
- DONE;
- }
- [(set_attr "type" "vfwmuladd")
- (set_attr "mode" "<V_DOUBLE_TRUNC>")])
-
-;; -------------------------------------------------------------------------
-;; ---- [FP] VFWNMACC
-;; -------------------------------------------------------------------------
-;; Includes:
-;; - vfwnmacc.vv
-;; -------------------------------------------------------------------------
-
-;; Combine ext + ext + fnms ===> widen fnms.
-;; Most of circumstantces, LoopVectorizer will generate the following IR:
-;; vect__8.176_40 = (vector([2,2]) double) vect__7.175_41;
-;; vect__11.180_35 = (vector([2,2]) double) vect__10.179_36;
-;; vect__13.182_33 = .FNMS (vect__11.180_35, vect__8.176_40, vect__4.172_45);
-(define_insn_and_split "*double_widen_fnms<mode>"
- [(set (match_operand:VWEXTF 0 "register_operand")
- (fma:VWEXTF
- (neg:VWEXTF
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
- (neg:VWEXTF
- (match_operand:VWEXTF 1 "register_operand"))))]
- "TARGET_VECTOR && can_create_pseudo_p ()"
- "#"
- "&& 1"
- [(const_int 0)]
- {
- riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_widen_mul_neg (MINUS, <MODE>mode),
- riscv_vector::RVV_WIDEN_TERNOP, operands);
- DONE;
- }
- [(set_attr "type" "vfwmuladd")
- (set_attr "mode" "<V_DOUBLE_TRUNC>")])
-
-;; This helps to match ext + fnms.
-(define_insn_and_split "*single_widen_fnms<mode>"
- [(set (match_operand:VWEXTF 0 "register_operand")
- (fma:VWEXTF
- (neg:VWEXTF
- (float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
- (match_operand:VWEXTF 3 "register_operand")
- (neg:VWEXTF
- (match_operand:VWEXTF 1 "register_operand"))))]
- "TARGET_VECTOR && can_create_pseudo_p ()"
- "#"
- "&& 1"
- [(const_int 0)]
- {
- insn_code icode = code_for_pred_extend (<MODE>mode);
- rtx tmp = gen_reg_rtx (<MODE>mode);
- rtx ext_ops[] = {tmp, operands[2]};
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ext_ops);
-
- rtx dst = expand_ternary_op (<MODE>mode, fnms_optab, tmp, operands[3],
- operands[1], operands[0], 0);
- emit_move_insn (operands[0], dst);
- DONE;
- }
- [(set_attr "type" "vfwmuladd")
- (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -O3 -ffast-math" } */
-
-#include <stdint-gcc.h>
-
-#define TEST_TYPE(TYPE1, TYPE2) \
- __attribute__ ((noipa)) void vwmacc_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
- TYPE2 *__restrict a, \
- TYPE2 *__restrict b, \
- int n) \
- { \
- for (int i = 0; i < n; i++) \
- dst[i] += -((TYPE1) a[i] * (TYPE1) b[i]); \
- }
-
-#define TEST_ALL() \
- TEST_TYPE (float, _Float16) \
- TEST_TYPE (double, float)
-
-TEST_ALL ()
-
-/* { dg-final { scan-assembler-times {\tvfwnmsac\.vv} 2 } } */
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -O3 -ffast-math" } */
-
-#include <stdint-gcc.h>
-
-#define TEST_TYPE(TYPE1, TYPE2) \
- __attribute__ ((noipa)) void vwmacc_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
- TYPE2 *__restrict a, \
- TYPE2 *__restrict b, \
- int n) \
- { \
- for (int i = 0; i < n; i++) \
- dst[i] = (TYPE1) a[i] * (TYPE1) b[i] - dst[i]; \
- }
-
-#define TEST_ALL() \
- TEST_TYPE (float, _Float16) \
- TEST_TYPE (double, float)
-
-TEST_ALL ()
-
-/* { dg-final { scan-assembler-times {\tvfwmsac\.vv} 2 } } */
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -O3 -ffast-math" } */
-
-#include <stdint-gcc.h>
-
-#define TEST_TYPE(TYPE1, TYPE2) \
- __attribute__ ((noipa)) void vwmacc_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
- TYPE2 *__restrict a, \
- TYPE2 *__restrict b, \
- int n) \
- { \
- for (int i = 0; i < n; i++) \
- dst[i] = -((TYPE1) a[i] * (TYPE1) b[i]) - dst[i]; \
- }
-
-#define TEST_ALL() \
- TEST_TYPE (float, _Float16) \
- TEST_TYPE (double, float)
-
-TEST_ALL ()
-
-/* { dg-final { scan-assembler-times {\tvfwnmacc\.vv} 2 } } */
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
-
-#include <stdint-gcc.h>
-
-#define TEST_TYPE(TYPE1, TYPE2) \
- __attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 ( \
- TYPE1 *__restrict dst, TYPE1 *__restrict dst2, TYPE1 *__restrict dst3, \
- TYPE1 *__restrict dst4, TYPE2 *__restrict a, TYPE2 *__restrict b, \
- TYPE2 *__restrict a2, TYPE2 *__restrict b2, int n) \
- { \
- for (int i = 0; i < n; i++) \
- { \
- dst[i] += -((TYPE1) a[i] * (TYPE1) b[i]); \
- dst2[i] += -((TYPE1) a2[i] * (TYPE1) b[i]); \
- dst3[i] += -((TYPE1) a2[i] * (TYPE1) a[i]); \
- dst4[i] += -((TYPE1) a[i] * (TYPE1) b2[i]); \
- } \
- }
-
-#define TEST_ALL() \
- TEST_TYPE (float, _Float16) \
- TEST_TYPE (double, float)
-
-TEST_ALL ()
-
-/* { dg-final { scan-assembler-times {\tvfwnmsac\.vv} 8 } } */
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
-
-#include <stdint-gcc.h>
-
-#define TEST_TYPE(TYPE1, TYPE2) \
- __attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 ( \
- TYPE1 *__restrict dst, TYPE1 *__restrict dst2, TYPE1 *__restrict dst3, \
- TYPE1 *__restrict dst4, TYPE2 *__restrict a, TYPE2 *__restrict b, \
- TYPE2 *__restrict a2, TYPE2 *__restrict b2, int n) \
- { \
- for (int i = 0; i < n; i++) \
- { \
- dst[i] = (TYPE1) a[i] * (TYPE1) b[i] - dst[i]; \
- dst2[i] = (TYPE1) a2[i] * (TYPE1) b[i] - dst2[i]; \
- dst3[i] = (TYPE1) a2[i] * (TYPE1) a[i] - dst3[i]; \
- dst4[i] = (TYPE1) a[i] * (TYPE1) b2[i] - dst4[i]; \
- } \
- }
-
-#define TEST_ALL() \
- TEST_TYPE (float, _Float16) \
- TEST_TYPE (double, float)
-
-TEST_ALL ()
-
-/* { dg-final { scan-assembler-times {\tvfwmsac\.vv} 8 } } */
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
-
-#include <stdint-gcc.h>
-
-#define TEST_TYPE(TYPE1, TYPE2) \
- __attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 ( \
- TYPE1 *__restrict dst, TYPE1 *__restrict dst2, TYPE1 *__restrict dst3, \
- TYPE1 *__restrict dst4, TYPE2 *__restrict a, TYPE2 *__restrict b, \
- TYPE2 *__restrict a2, TYPE2 *__restrict b2, int n) \
- { \
- for (int i = 0; i < n; i++) \
- { \
- dst[i] = -((TYPE1) a[i] * (TYPE1) b[i]) - dst[i]; \
- dst2[i] = -((TYPE1) a2[i] * (TYPE1) b[i]) - dst2[i]; \
- dst3[i] = -((TYPE1) a2[i] * (TYPE1) a[i]) - dst3[i]; \
- dst4[i] = -((TYPE1) a[i] * (TYPE1) b2[i]) - dst4[i]; \
- } \
- }
-
-#define TEST_ALL() \
- TEST_TYPE (float, _Float16) \
- TEST_TYPE (double, float)
-
-TEST_ALL ()
-
-/* { dg-final { scan-assembler-times {\tvfwnmacc\.vv} 8 } } */
+++ /dev/null
-/* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
-
-#include <assert.h>
-#include "widen-10.c"
-
-#define SZ 512
-
-#define RUN(TYPE1, TYPE2, LIMIT) \
- TYPE2 a##TYPE2[SZ]; \
- TYPE2 b##TYPE2[SZ]; \
- TYPE1 dst##TYPE1[SZ]; \
- TYPE1 dst2##TYPE1[SZ]; \
- for (int i = 0; i < SZ; i++) \
- { \
- a##TYPE2[i] = LIMIT + i % 8723; \
- b##TYPE2[i] = LIMIT + i & 1964; \
- dst##TYPE1[i] = LIMIT + i & 628; \
- dst2##TYPE1[i] = LIMIT + i & 628; \
- } \
- vwmacc_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
- for (int i = 0; i < SZ; i++) \
- assert (dst##TYPE1[i] \
- == -((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]) + dst2##TYPE1[i]);
-
-#define RUN_ALL() RUN (double, float, -2147483648)
-
-int
-main ()
-{
- RUN_ALL ()
-}
+++ /dev/null
-/* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
-
-#include <assert.h>
-#include "widen-11.c"
-
-#define SZ 512
-
-#define RUN(TYPE1, TYPE2, LIMIT) \
- TYPE2 a##TYPE2[SZ]; \
- TYPE2 b##TYPE2[SZ]; \
- TYPE1 dst##TYPE1[SZ]; \
- TYPE1 dst2##TYPE1[SZ]; \
- for (int i = 0; i < SZ; i++) \
- { \
- a##TYPE2[i] = LIMIT + i % 8723; \
- b##TYPE2[i] = LIMIT + i & 1964; \
- dst##TYPE1[i] = LIMIT + i & 628; \
- dst2##TYPE1[i] = LIMIT + i & 628; \
- } \
- vwmacc_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
- for (int i = 0; i < SZ; i++) \
- assert (dst##TYPE1[i] \
- == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]) - dst2##TYPE1[i]);
-
-#define RUN_ALL() RUN (double, float, -2147483648)
-
-int
-main ()
-{
- RUN_ALL ()
-}
+++ /dev/null
-/* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
-
-#include <assert.h>
-#include "widen-12.c"
-
-#define SZ 512
-
-#define RUN(TYPE1, TYPE2, LIMIT) \
- TYPE2 a##TYPE2[SZ]; \
- TYPE2 b##TYPE2[SZ]; \
- TYPE1 dst##TYPE1[SZ]; \
- TYPE1 dst2##TYPE1[SZ]; \
- for (int i = 0; i < SZ; i++) \
- { \
- a##TYPE2[i] = LIMIT + i % 8723; \
- b##TYPE2[i] = LIMIT + i & 1964; \
- dst##TYPE1[i] = LIMIT + i & 628; \
- dst2##TYPE1[i] = LIMIT + i & 628; \
- } \
- vwmacc_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
- for (int i = 0; i < SZ; i++) \
- assert (dst##TYPE1[i] \
- == -((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]) - dst2##TYPE1[i]);
-
-#define RUN_ALL() RUN (double, float, -2147483648)
-
-int
-main ()
-{
- RUN_ALL ()
-}
+++ /dev/null
-/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
-
-#include <assert.h>
-#include "widen-10.c"
-
-#define SZ 512
-
-#define RUN(TYPE1, TYPE2, LIMIT) \
- TYPE2 a##TYPE2[SZ]; \
- TYPE2 b##TYPE2[SZ]; \
- TYPE1 dst##TYPE1[SZ]; \
- TYPE1 dst2##TYPE1[SZ]; \
- for (int i = 0; i < SZ; i++) \
- { \
- a##TYPE2[i] = LIMIT + i % 8723; \
- b##TYPE2[i] = LIMIT + i & 1964; \
- dst##TYPE1[i] = LIMIT + i & 628; \
- dst2##TYPE1[i] = LIMIT + i & 628; \
- } \
- vwmacc_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
- for (int i = 0; i < SZ; i++) \
- assert (dst##TYPE1[i] \
- == -((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]) + dst2##TYPE1[i]);
-
-#define RUN_ALL() RUN (float, _Float16, -32768)
-
-int
-main ()
-{
- RUN_ALL ()
-}
+++ /dev/null
-/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
-
-#include <assert.h>
-#include "widen-11.c"
-
-#define SZ 512
-
-#define RUN(TYPE1, TYPE2, LIMIT) \
- TYPE2 a##TYPE2[SZ]; \
- TYPE2 b##TYPE2[SZ]; \
- TYPE1 dst##TYPE1[SZ]; \
- TYPE1 dst2##TYPE1[SZ]; \
- for (int i = 0; i < SZ; i++) \
- { \
- a##TYPE2[i] = LIMIT + i % 8723; \
- b##TYPE2[i] = LIMIT + i & 1964; \
- dst##TYPE1[i] = LIMIT + i & 628; \
- dst2##TYPE1[i] = LIMIT + i & 628; \
- } \
- vwmacc_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
- for (int i = 0; i < SZ; i++) \
- assert (dst##TYPE1[i] \
- == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]) - dst2##TYPE1[i]);
-
-#define RUN_ALL() RUN (float, _Float16, -32768)
-
-int
-main ()
-{
- RUN_ALL ()
-}
+++ /dev/null
-/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
-
-#include <assert.h>
-#include "widen-12.c"
-
-#define SZ 512
-
-#define RUN(TYPE1, TYPE2, LIMIT) \
- TYPE2 a##TYPE2[SZ]; \
- TYPE2 b##TYPE2[SZ]; \
- TYPE1 dst##TYPE1[SZ]; \
- TYPE1 dst2##TYPE1[SZ]; \
- for (int i = 0; i < SZ; i++) \
- { \
- a##TYPE2[i] = LIMIT + i % 8723; \
- b##TYPE2[i] = LIMIT + i & 1964; \
- dst##TYPE1[i] = LIMIT + i & 628; \
- dst2##TYPE1[i] = LIMIT + i & 628; \
- } \
- vwmacc_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
- for (int i = 0; i < SZ; i++) \
- assert (dst##TYPE1[i] \
- == -((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]) - dst2##TYPE1[i]);
-
-#define RUN_ALL() RUN (float, _Float16, -32768)
-
-int
-main ()
-{
- RUN_ALL ()
-}