From: Dennis Zhang Date: Fri, 11 Dec 2020 15:10:58 +0000 (+0000) Subject: aarch64: intrinsics to convert BFloat16 to Float32 X-Git-Tag: releases/gcc-10.3.0~520 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=702e45ee471422dee86d32fc84f617d341d33175;p=thirdparty%2Fgcc.git aarch64: intrinsics to convert BFloat16 to Float32 gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry. (vbfcvt_high, bfcvt): Likewise. * config/aarch64/aarch64-simd.md(aarch64_vbfcvt): New entry. (aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise. * config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic. * config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise. (vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise. gcc/testsuite/ChangeLog * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c (test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests. (test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise. (cherry picked from commit f7d6961126a7f06c8089d8a58bd21be43bc16806) --- diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index ba2bda26dcdd..7192f3954d31 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -728,3 +728,8 @@ VAR1 (UNOP, bfcvtn_q, 0, v8bf) VAR1 (BINOP, bfcvtn2, 0, v8bf) VAR1 (UNOP, bfcvt, 0, bf) + + /* Implemented by aarch64_{v}bfcvt{_high}. */ + VAR2 (UNOP, vbfcvt, 0, v4bf, v8bf) + VAR1 (UNOP, vbfcvt_high, 0, v8bf) + VAR1 (UNOP, bfcvt, 0, sf) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 9f0e2bd1e6ff..2e8aa668b107 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7238,3 +7238,31 @@ "bfcvt\\t%h0, %s1" [(set_attr "type" "f_cvt")] ) + +;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes. +(define_insn "aarch64_vbfcvt" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")] + UNSPEC_BFCVTN))] + "TARGET_BF16_SIMD" + "shll\\t%0.4s, %1.4h, #16" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "aarch64_vbfcvt_highv8bf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")] + UNSPEC_BFCVTN2))] + "TARGET_BF16_SIMD" + "shll2\\t%0.4s, %1.8h, #16" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "aarch64_bfcvtsf" + [(set (match_operand:SF 0 "register_operand" "=w") + (unspec:SF [(match_operand:BF 1 "register_operand" "w")] + UNSPEC_BFCVT))] + "TARGET_BF16_FP" + "shl\\t%d0, %d1, #16" + [(set_attr "type" "neon_shift_imm")] +) diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h index 984875dcc014..881615498d3d 100644 --- a/gcc/config/aarch64/arm_bf16.h +++ b/gcc/config/aarch64/arm_bf16.h @@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a) return __builtin_aarch64_bfcvtbf (__a); } +__extension__ extern __inline float32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtah_f32_bf16 (bfloat16_t __a) +{ + return __builtin_aarch64_bfcvtsf (__a); +} + #pragma GCC pop_options #endif diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 95bfa5ebba21..69cccd327864 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -35680,6 +35680,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); } +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvt_f32_bf16 (bfloat16x4_t __a) +{ + return __builtin_aarch64_vbfcvtv4bf (__a); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_low_f32_bf16 (bfloat16x8_t __a) +{ + return __builtin_aarch64_vbfcvtv8bf (__a); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_high_f32_bf16 (bfloat16x8_t __a) +{ + return __builtin_aarch64_vbfcvt_highv8bf (__a); +} + __extension__ extern __inline bfloat16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvt_bf16_f32 (float32x4_t __a) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c index bbea630b1820..47af7c494d9b 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c @@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a) { return vcvth_bf16_f32 (a); } + +/* +**test_vcvt_f32_bf16: +** shll v0.4s, v0.4h, #16 +** ret +*/ +float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a) +{ + return vcvt_f32_bf16 (a); +} + +/* +**test_vcvtq_low_f32_bf16: +** shll v0.4s, v0.4h, #16 +** ret +*/ +float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a) +{ + return vcvtq_low_f32_bf16 (a); +} + +/* +**test_vcvtq_high_f32_bf16: +** shll2 v0.4s, v0.8h, #16 +** ret +*/ +float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a) +{ + return vcvtq_high_f32_bf16 (a); +} + +/* +**test_vcvtah_f32_bf16: +** shl d0, d0, #16 +** ret +*/ +float32_t test_vcvtah_f32_bf16 (bfloat16_t a) +{ + return vcvtah_f32_bf16 (a); +}