}
[(set_attr "type" "vfncvtitof")])
+;; This operation can be performed in the loop vectorizer but unfortunately
+;; not applicable for now. We can remove this pattern after loop vectorizer
+;; is able to take care of INT64 to FP16 conversion.
+(define_insn_and_split "<float_cvt><mode><vnnconvert>2"
+ [(set (match_operand:<VNNCONVERT> 0 "register_operand")
+ (any_float:<VNNCONVERT>
+ (match_operand:VWWCONVERTI 1 "register_operand")))]
+ "TARGET_VECTOR && TARGET_ZVFH && can_create_pseudo_p () && !flag_trapping_math"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx single = gen_reg_rtx (<VNCONVERT>mode); /* Get vector SF mode. */
+
+ /* Step-1, INT64 => FP32. */
+ emit_insn (gen_<float_cvt><mode><vnconvert>2 (single, operands[1]));
+ /* Step-2, FP32 => FP16. */
+ emit_insn (gen_trunc<vnconvert><vnnconvert>2 (operands[0], single));
+
+ DONE;
+ }
+ [(set_attr "type" "vfncvtitof")]
+)
+
;; =========================================================================
;; == Unary arithmetic
;; =========================================================================
(V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096")
])
+(define_mode_iterator VWWCONVERTI [
+ (RVVM8DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+ (RVVM4DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+ (RVVM2DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+ (RVVM1DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+
+ (V1DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+ (V2DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+ (V4DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH")
+ (V8DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 64")
+ (V16DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
+ (V32DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 256")
+ (V64DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 512")
+ (V128DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 1024")
+ (V256DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 2048")
+ (V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 4096")
+])
+
(define_mode_iterator VQEXTI [
RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
(V512DF "v512si")
])
+;; NN indicates narrow twice
+(define_mode_attr VNNCONVERT [
+ (RVVM8DI "RVVM2HF") (RVVM4DI "RVVM1HF") (RVVM2DI "RVVMF2HF")
+ (RVVM1DI "RVVMF4HF")
+
+ (V1DI "V1HF") (V2DI "V2HF") (V4DI "V4HF") (V8DI "V8HF") (V16DI "V16HF")
+ (V32DI "V32HF") (V64DI "V64HF") (V128DI "V128HF") (V256DI "V256HF")
+ (V512DI "V512HF")
+])
+
+;; nn indicates narrow twice
+(define_mode_attr vnnconvert [
+ (RVVM8DI "rvvm2hf") (RVVM4DI "rvvm1hf") (RVVM2DI "rvvmf2hf")
+ (RVVM1DI "rvvmf4hf")
+
+ (V1DI "v1hf") (V2DI "v2hf") (V4DI "v4hf") (V8DI "v8hf") (V16DI "v16hf")
+ (V32DI "v32hf") (V64DI "v64hf") (V128DI "v128hf") (V256DI "v256hf")
+ (V512DI "v512hf")
+])
+
(define_mode_attr VDEMOTE [
(RVVM8DI "RVVM8SI") (RVVM4DI "RVVM4SI") (RVVM2DI "RVVM2SI") (RVVM1DI "RVVM1SI")
(V1DI "V1SI")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -ffast-math -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint.h>
+
+/*
+** test_int65_to_fp16:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma
+** vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+
+** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma
+** vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+
+** ...
+*/
+void
+test_int65_to_fp16 (int64_t * __restrict a, _Float16 *b, unsigned n)
+{
+ for (unsigned i = 0; i < n; i++)
+ b[i] = (_Float16) (a[i]);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -ffast-math -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint.h>
+
+/*
+** test_uint65_to_fp16:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma
+** vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+
+** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma
+** vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+
+** ...
+*/
+void
+test_uint65_to_fp16 (uint64_t * __restrict a, _Float16 *b, unsigned n)
+{
+ for (unsigned i = 0; i < n; i++)
+ b[i] = (_Float16) (a[i]);
+}
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -ffast-math --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_CONVERT (fp16, int64_t, _Float16, 1)
+DEF_CONVERT (fp16, int64_t, _Float16, 2)
+DEF_CONVERT (fp16, int64_t, _Float16, 4)
+DEF_CONVERT (fp16, int64_t, _Float16, 8)
+DEF_CONVERT (fp16, int64_t, _Float16, 16)
+DEF_CONVERT (fp16, int64_t, _Float16, 32)
+DEF_CONVERT (fp16, int64_t, _Float16, 64)
+DEF_CONVERT (fp16, int64_t, _Float16, 128)
+DEF_CONVERT (fp16, int64_t, _Float16, 256)
+DEF_CONVERT (fp16, int64_t, _Float16, 512)
+DEF_CONVERT (fp16, int64_t, _Float16, 1024)
+DEF_CONVERT (fp16, int64_t, _Float16, 2048)
+
+DEF_CONVERT (fp16, uint64_t, _Float16, 1)
+DEF_CONVERT (fp16, uint64_t, _Float16, 2)
+DEF_CONVERT (fp16, uint64_t, _Float16, 4)
+DEF_CONVERT (fp16, uint64_t, _Float16, 8)
+DEF_CONVERT (fp16, uint64_t, _Float16, 16)
+DEF_CONVERT (fp16, uint64_t, _Float16, 32)
+DEF_CONVERT (fp16, uint64_t, _Float16, 64)
+DEF_CONVERT (fp16, uint64_t, _Float16, 128)
+DEF_CONVERT (fp16, uint64_t, _Float16, 256)
+DEF_CONVERT (fp16, uint64_t, _Float16, 512)
+DEF_CONVERT (fp16, uint64_t, _Float16, 1024)
+DEF_CONVERT (fp16, uint64_t, _Float16, 2048)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */
+/* { dg-final { scan-assembler-times {vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */
+/* { dg-final { scan-assembler-times {vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+} 30 } } */