From: Andrea Corallo Date: Wed, 21 Oct 2020 09:16:01 +0000 (+0200) Subject: arm: Add vld1_lane_bf16 + vldq_lane_bf16 intrinsics X-Git-Tag: releases/gcc-10.3.0~526 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=00b3e8408ab6173bedb5a0ad10c8cec17b8e99af;p=thirdparty%2Fgcc.git arm: Add vld1_lane_bf16 + vldq_lane_bf16 intrinsics gcc/ChangeLog 2020-10-21 Andrea Corallo * config/arm/arm_neon_builtins.def: Add to LOAD1LANE v4bf, v8bf. * config/arm/arm_neon.h (vld1_lane_bf16, vld1q_lane_bf16): Add intrinsics. gcc/testsuite/ChangeLog 2020-10-21 Andrea Corallo * gcc.target/arm/simd/vld1_lane_bf16_1.c: New testcase. * gcc.target/arm/simd/vld1_lane_bf16_indices_1.c: Likewise. * gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c: Likewise. --- diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index aa21730dea0d..fcd8020425e7 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -19665,6 +19665,20 @@ vld4q_dup_bf16 (const bfloat16_t * __ptr) return __rv.__i; } +__extension__ extern __inline bfloat16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_lane_bf16 (const bfloat16_t * __a, bfloat16x4_t __b, const int __c) +{ + return __builtin_neon_vld1_lanev4bf (__a, __b, __c); +} + +__extension__ extern __inline bfloat16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_lane_bf16 (const bfloat16_t * __a, bfloat16x8_t __b, const int __c) +{ + return __builtin_neon_vld1_lanev8bf (__a, __b, __c); +} + #pragma GCC pop_options #ifdef __cplusplus diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 34c1945c0a11..d0617a4695dd 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -312,8 +312,8 @@ VAR1 (TERNOP, vtbx3, v8qi) VAR1 (TERNOP, vtbx4, v8qi) VAR12 (LOAD1, vld1, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di) -VAR10 (LOAD1LANE, vld1_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR12 (LOAD1LANE, vld1_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) VAR12 (STORE1, vst1, diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c new file mode 100644 index 000000000000..94fb38f32b8f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c @@ -0,0 +1,22 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-require-effective-target arm_hard_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-O3 --save-temps -mfloat-abi=hard" } */ + +#include "arm_neon.h" + +bfloat16x4_t +test_vld1_lane_bf16 (bfloat16_t *a, bfloat16x4_t b) +{ + return vld1_lane_bf16 (a, b, 1); +} + +bfloat16x8_t +test_vld1q_lane_bf16 (bfloat16_t *a, bfloat16x8_t b) +{ + return vld1q_lane_bf16 (a, b, 2); +} + +/* { dg-final { scan-assembler "vld1.16\t{d0\\\[1\\\]}, \\\[r0\\\]" } } */ +/* { dg-final { scan-assembler "vld1.16\t{d0\\\[2\\\]}, \\\[r0\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c new file mode 100644 index 000000000000..d9af512cf92c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-require-effective-target arm_hard_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-mfloat-abi=hard" } */ + +#include "arm_neon.h" + +bfloat16x4_t +test_vld1_lane_bf16 (bfloat16_t *a, bfloat16x4_t b) +{ + bfloat16x4_t res; + res = vld1_lane_bf16 (a, b, -1); + res = vld1_lane_bf16 (a, b, 4); + return res; +} + +/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ +/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c new file mode 100644 index 000000000000..a73184c0f780 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-require-effective-target arm_hard_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-mfloat-abi=hard" } */ + +#include "arm_neon.h" + +bfloat16x8_t +test_vld1q_lane_bf16 (bfloat16_t *a, bfloat16x8_t b) +{ + bfloat16x8_t res; + res = vld1q_lane_bf16 (a, b, -1); + res = vld1q_lane_bf16 (a, b, 8); + return res; +} + +/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */ +/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */