]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Add Armv8.6 SVE bfloat16 support
authorRichard Sandiford <richard.sandiford@arm.com>
Thu, 30 Jan 2020 15:46:28 +0000 (15:46 +0000)
committerRichard Sandiford <richard.sandiford@arm.com>
Fri, 31 Jan 2020 17:40:40 +0000 (17:40 +0000)
This patch adds support for the SVE intrinsics that map to Armv8.6
bfloat16 instructions.  This means that svcvtnt is now a base SVE
function for one type suffix combination; the others are still
SVE2-specific.

This relies on a binutils fix:

    https://sourceware.org/ml/binutils/2020-01/msg00450.html

so anyone testing older binutils 2.34 or binutils master sources will
need to upgrade to get clean test results.  (At the time of writing,
no released version of binutils has this bug.)

2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* config/aarch64/aarch64.h (TARGET_SVE_BF16): New macro.
* config/aarch64/aarch64-sve-builtins-sve2.h (svcvtnt): Move to
aarch64-sve-builtins-base.h.
* config/aarch64/aarch64-sve-builtins-sve2.cc (svcvtnt): Move to
aarch64-sve-builtins-base.cc.
* config/aarch64/aarch64-sve-builtins-base.h (svbfdot, svbfdot_lane)
(svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
(svcvtnt): Declare.
* config/aarch64/aarch64-sve-builtins-base.cc (svbfdot, svbfdot_lane)
(svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
(svcvtnt): New functions.
* config/aarch64/aarch64-sve-builtins-base.def (svbfdot, svbfdot_lane)
(svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
(svcvtnt): New functions.
(svcvt): Add a form that converts f32 to bf16.
* config/aarch64/aarch64-sve-builtins-shapes.h (ternary_bfloat)
(ternary_bfloat_lane, ternary_bfloat_lanex2, ternary_bfloat_opt_n):
Declare.
* config/aarch64/aarch64-sve-builtins-shapes.cc (parse_element_type):
Treat B as bfloat16_t.
(ternary_bfloat_lane_base): New class.
(ternary_bfloat_def): Likewise.
(ternary_bfloat): New shape.
(ternary_bfloat_lane_def): New class.
(ternary_bfloat_lane): New shape.
(ternary_bfloat_lanex2_def): New class.
(ternary_bfloat_lanex2): New shape.
(ternary_bfloat_opt_n_def): New class.
(ternary_bfloat_opt_n): New shape.
* config/aarch64/aarch64-sve-builtins.cc (TYPES_cvt_bfloat): New macro.
* config/aarch64/aarch64-sve.md (@aarch64_sve_<sve_fp_op>vnx4sf)
(@aarch64_sve_<sve_fp_op>_lanevnx4sf): New patterns.
(@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>)
(@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
(*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
(@aarch64_sve_cvtnt<VNx8BF_ONLY:mode>): Likewise.
* config/aarch64/aarch64-sve2.md (@aarch64_sve2_cvtnt<mode>): Key
the pattern off the narrow mode instead of the wider one.
* config/aarch64/iterators.md (VNx8BF_ONLY): New mode iterator.
(UNSPEC_BFMLALB, UNSPEC_BFMLALT, UNSPEC_BFMMLA): New unspecs.
(sve_fp_op): Handle them.
(SVE_BFLOAT_TERNARY_LONG): New int itertor.
(SVE_BFLOAT_TERNARY_LONG_LANE): Likewise.

gcc/testsuite/
* lib/target-supports.exp (check_effective_target_aarch64_asm_bf16_ok):
New proc.
* gcc.target/aarch64/sve/acle/asm/bfdot_f32.c: New test.
* gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c: Likweise.
* gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c: Likweise.
* gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c: Likweise.
* gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c: Likweise.
* gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c: Likweise.
* gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c: Likweise.
* gcc.target/aarch64/sve/acle/asm/cvt_bf16.c: Likweise.
* gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c: Likweise.
* gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c: Likweise.
* gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c:
Likweise.
* gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c:
Likweise.
* gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c:
Likweise.

28 files changed:
gcc/ChangeLog
gcc/config/aarch64/aarch64-sve-builtins-base.cc
gcc/config/aarch64/aarch64-sve-builtins-base.def
gcc/config/aarch64/aarch64-sve-builtins-base.h
gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
gcc/config/aarch64/aarch64-sve-builtins-shapes.h
gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
gcc/config/aarch64/aarch64-sve-builtins-sve2.h
gcc/config/aarch64/aarch64-sve-builtins.cc
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64-sve2.md
gcc/config/aarch64/aarch64.h
gcc/config/aarch64/iterators.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c [new file with mode: 0644]
gcc/testsuite/lib/target-supports.exp

index d10ae9294cc411178cce7c299b4dbef34b9d01a5..234e32844ff952938698928d858b275f1682df42 100644 (file)
@@ -1,3 +1,49 @@
+2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/aarch64.h (TARGET_SVE_BF16): New macro.
+       * config/aarch64/aarch64-sve-builtins-sve2.h (svcvtnt): Move to
+       aarch64-sve-builtins-base.h.
+       * config/aarch64/aarch64-sve-builtins-sve2.cc (svcvtnt): Move to
+       aarch64-sve-builtins-base.cc.
+       * config/aarch64/aarch64-sve-builtins-base.h (svbfdot, svbfdot_lane)
+       (svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+       (svcvtnt): Declare.
+       * config/aarch64/aarch64-sve-builtins-base.cc (svbfdot, svbfdot_lane)
+       (svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+       (svcvtnt): New functions.
+       * config/aarch64/aarch64-sve-builtins-base.def (svbfdot, svbfdot_lane)
+       (svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+       (svcvtnt): New functions.
+       (svcvt): Add a form that converts f32 to bf16.
+       * config/aarch64/aarch64-sve-builtins-shapes.h (ternary_bfloat)
+       (ternary_bfloat_lane, ternary_bfloat_lanex2, ternary_bfloat_opt_n):
+       Declare.
+       * config/aarch64/aarch64-sve-builtins-shapes.cc (parse_element_type):
+       Treat B as bfloat16_t.
+       (ternary_bfloat_lane_base): New class.
+       (ternary_bfloat_def): Likewise.
+       (ternary_bfloat): New shape.
+       (ternary_bfloat_lane_def): New class.
+       (ternary_bfloat_lane): New shape.
+       (ternary_bfloat_lanex2_def): New class.
+       (ternary_bfloat_lanex2): New shape.
+       (ternary_bfloat_opt_n_def): New class.
+       (ternary_bfloat_opt_n): New shape.
+       * config/aarch64/aarch64-sve-builtins.cc (TYPES_cvt_bfloat): New macro.
+       * config/aarch64/aarch64-sve.md (@aarch64_sve_<sve_fp_op>vnx4sf)
+       (@aarch64_sve_<sve_fp_op>_lanevnx4sf): New patterns.
+       (@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>)
+       (@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
+       (*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
+       (@aarch64_sve_cvtnt<VNx8BF_ONLY:mode>): Likewise.
+       * config/aarch64/aarch64-sve2.md (@aarch64_sve2_cvtnt<mode>): Key
+       the pattern off the narrow mode instead of the wider one.
+       * config/aarch64/iterators.md (VNx8BF_ONLY): New mode iterator.
+       (UNSPEC_BFMLALB, UNSPEC_BFMLALT, UNSPEC_BFMMLA): New unspecs.
+       (sve_fp_op): Handle them.
+       (SVE_BFLOAT_TERNARY_LONG): New int itertor.
+       (SVE_BFLOAT_TERNARY_LONG_LANE): Likewise.
+
 2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
 
        * config/aarch64/arm_sve.h: Include arm_bf16.h.
index 9ae143c287f9ab1e1c8861d2631ea3d5bfbf8f97..9b63ea76ecd4c1cc3484d0ae6c4185db62cc9b34 100644 (file)
@@ -2544,6 +2544,16 @@ FUNCTION (svandv, reduction, (UNSPEC_ANDV))
 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
 FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
 FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
+FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
+FUNCTION (svbfdot_lane, fixed_insn_function,
+         (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
+FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
+FUNCTION (svbfmlalb_lane, fixed_insn_function,
+         (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
+FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
+FUNCTION (svbfmlalt_lane, fixed_insn_function,
+         (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
+FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
 FUNCTION (svbic, svbic_impl,)
 FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
 FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
@@ -2592,6 +2602,7 @@ FUNCTION (svcreate2, svcreate_impl, (2))
 FUNCTION (svcreate3, svcreate_impl, (3))
 FUNCTION (svcreate4, svcreate_impl, (4))
 FUNCTION (svcvt, svcvt_impl,)
+FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
 FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdot, svdot_impl,)
index 332555b34fbcd1502475cf5885509b24b09d245a..27ab05d4ef207b8c8409a0b77c768ab66a774dcf 100644 (file)
@@ -318,6 +318,18 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
 DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS AARCH64_FL_BF16
+DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none)
+DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
+#undef REQUIRED_EXTENSIONS
+
 #define REQUIRED_EXTENSIONS AARCH64_FL_I8MM
 DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
 DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
index 5c19b7d899d7e0f8204c893cc8a2b8dd6a7a4859..957ace8ed645b22825b96cebe48402de526e3f8a 100644 (file)
@@ -42,6 +42,13 @@ namespace aarch64_sve
     extern const function_base *const svasr;
     extern const function_base *const svasr_wide;
     extern const function_base *const svasrd;
+    extern const function_base *const svbfdot;
+    extern const function_base *const svbfdot_lane;
+    extern const function_base *const svbfmlalb;
+    extern const function_base *const svbfmlalb_lane;
+    extern const function_base *const svbfmlalt;
+    extern const function_base *const svbfmlalt_lane;
+    extern const function_base *const svbfmmla;
     extern const function_base *const svbic;
     extern const function_base *const svbrka;
     extern const function_base *const svbrkb;
@@ -84,6 +91,7 @@ namespace aarch64_sve
     extern const function_base *const svcreate3;
     extern const function_base *const svcreate4;
     extern const function_base *const svcvt;
+    extern const function_base *const svcvtnt;
     extern const function_base *const svdiv;
     extern const function_base *const svdivr;
     extern const function_base *const svdot;
index 1ea3caa258c70d8f37eac678474a216128ee7edc..5f8c85d213ece1c19d44946f19e8ac1dfac3fc51 100644 (file)
@@ -78,6 +78,7 @@ apply_predication (const function_instance &instance, tree return_type,
    [01]    - the element type in type suffix 0 or 1 of INSTANCE
    f<bits> - a floating-point type with the given number of bits
    f[01]   - a floating-point type with the same width as type suffix 0 or 1
+   B       - bfloat16_t
    h<elt>  - a half-sized version of <elt>
    p       - a predicate (represented as TYPE_SUFFIX_b)
    q<elt>  - a quarter-sized version of <elt>
@@ -117,6 +118,9 @@ parse_element_type (const function_instance &instance, const char *&format)
   if (ch == 'p')
     return TYPE_SUFFIX_b;
 
+  if (ch == 'B')
+    return TYPE_SUFFIX_bf16;
+
   if (ch == 'q')
     {
       type_suffix_index suffix = parse_element_type (instance, format);
@@ -921,6 +925,26 @@ struct ternary_resize2_lane_base : public overloaded_base<0>
   }
 };
 
+/* A specialization of ternary_resize2_lane_base for bfloat16 elements,
+   indexed in groups of N elements.  */
+template<unsigned int N>
+struct ternary_bfloat_lane_base
+  : public ternary_resize2_lane_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_lane_index (3, N);
+  }
+};
+
 /* A specialization of ternary_resize2_lane_base for quarter-sized
    elements.  */
 template<type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
@@ -2695,6 +2719,48 @@ struct tbl_tuple_def : public overloaded_base<0>
 };
 SHAPE (tbl_tuple)
 
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t).  */
+struct ternary_bfloat_def
+  : public ternary_resize2_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB", group, MODE_none);
+  }
+};
+SHAPE (ternary_bfloat)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 7].  */
+typedef ternary_bfloat_lane_base<1> ternary_bfloat_lane_def;
+SHAPE (ternary_bfloat_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 3].  */
+typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def;
+SHAPE (ternary_bfloat_lanex2)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t).  */
+struct ternary_bfloat_opt_n_def
+  : public ternary_resize2_opt_n_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB", group, MODE_none);
+    build_all (b, "v0,v0,vB,sB", group, MODE_n);
+  }
+};
+SHAPE (ternary_bfloat_opt_n)
+
 /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t,
                       uint64_t)
 
index 1ce09978efa25e45b2e5f19131313e9fa41ebf7b..3a19982ea2b79769266773d42997e2ecca0a02ca 100644 (file)
@@ -148,6 +148,10 @@ namespace aarch64_sve
     extern const function_shape *const store_scatter_offset;
     extern const function_shape *const store_scatter_offset_restricted;
     extern const function_shape *const tbl_tuple;
+    extern const function_shape *const ternary_bfloat;
+    extern const function_shape *const ternary_bfloat_lane;
+    extern const function_shape *const ternary_bfloat_lanex2;
+    extern const function_shape *const ternary_bfloat_opt_n;
     extern const function_shape *const ternary_intq_uintq_lane;
     extern const function_shape *const ternary_intq_uintq_opt_n;
     extern const function_shape *const ternary_lane;
index 53b16511623699eee4b30a0dd73af32c7ef80663..9e7219cfecb56495a85e40e8eacf4bcfdad36014 100644 (file)
@@ -487,7 +487,6 @@ FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
 FUNCTION (svcdot, svcdot_impl,)
 FUNCTION (svcdot_lane, svcdot_lane_impl,)
 FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
-FUNCTION (svcvtnt, CODE_FOR_MODE1 (aarch64_sve2_cvtnt),)
 FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
 FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
 FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
index 90e29fc0a924e133a844f7d90bfad38369000a4c..06d4a9369ffcf86308ac6c5ea4e4d7e24bd9cc0e 100644 (file)
@@ -54,7 +54,6 @@ namespace aarch64_sve
     extern const function_base *const svcdot;
     extern const function_base *const svcdot_lane;
     extern const function_base *const svcvtlt;
-    extern const function_base *const svcvtnt;
     extern const function_base *const svcvtx;
     extern const function_base *const svcvtxnt;
     extern const function_base *const sveor3;
index d4d201d4f5c519b1bd65e761f31d5354f96a4fa5..2c5543b6e6ad9abf61541ae07be1b1024473f226 100644 (file)
@@ -354,6 +354,10 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
   D (u32, f16), D (u32, f32), D (u32, f64), \
   D (u64, f16), D (u64, f32), D (u64, f64)
 
+/* _bf16_f32.  */
+#define TYPES_cvt_bfloat(S, D) \
+  D (bf16, f32)
+
 /* _f32_f16
    _f64_f32.  */
 #define TYPES_cvt_long(S, D) \
@@ -471,6 +475,7 @@ DEF_SVE_TYPES_ARRAY (d_unsigned);
 DEF_SVE_TYPES_ARRAY (d_integer);
 DEF_SVE_TYPES_ARRAY (d_data);
 DEF_SVE_TYPES_ARRAY (cvt);
+DEF_SVE_TYPES_ARRAY (cvt_bfloat);
 DEF_SVE_TYPES_ARRAY (cvt_long);
 DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
 DEF_SVE_TYPES_ARRAY (cvt_narrow);
index abaac11d0c852de23338e22277f84535ddd4a0ba..fa3852992e1341a68b927db81f2712091dbf72ce 100644 (file)
 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
 ;; ---- [FP] Complex multiply-add
 ;; ---- [FP] Trigonometric multiply-add
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
 ;; ---- [FP] Matrix multiply-accumulate
 ;;
 ;; == Comparisons and selects
 ;; ---- [FP<-INT] Packs
 ;; ---- [FP<-INT] Unpacks
 ;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Packs (bfloat16)
 ;; ---- [FP<-FP] Unpacks
 ;; ---- [PRED<-PRED] Packs
 ;; ---- [PRED<-PRED] Unpacks
   [(set_attr "movprfx" "*,yes")]
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFDOT (BF16)
+;; - BFMLALB (BF16)
+;; - BFMLALT (BF16)
+;; - BFMMLA (BF16)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
+  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+       (unspec:VNx4SF
+         [(match_operand:VNx4SF 1 "register_operand" "0, w")
+          (match_operand:VNx8BF 2 "register_operand" "w, w")
+          (match_operand:VNx8BF 3 "register_operand" "w, w")]
+         SVE_BFLOAT_TERNARY_LONG))]
+  "TARGET_SVE_BF16"
+  "@
+   <sve_fp_op>\t%0.s, %2.h, %3.h
+   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
+  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+       (unspec:VNx4SF
+         [(match_operand:VNx4SF 1 "register_operand" "0, w")
+          (match_operand:VNx8BF 2 "register_operand" "w, w")
+          (match_operand:VNx8BF 3 "register_operand" "y, y")
+          (match_operand:SI 4 "const_int_operand")]
+         SVE_BFLOAT_TERNARY_LONG_LANE))]
+  "TARGET_SVE_BF16"
+  "@
+   <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
+   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Matrix multiply-accumulate
 ;; -------------------------------------------------------------------------
   [(set_attr "movprfx" "*,yes,yes")]
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Packs (bfloat16)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFCVT (BF16)
+;; - BFCVTNT (BF16)
+;; -------------------------------------------------------------------------
+
+;; Predicated BFCVT.
+(define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+       (unspec:VNx8BF_ONLY
+         [(match_operand:VNx4BI 1 "register_operand" "Upl")
+          (match_operand:SI 3 "aarch64_sve_gp_strictness")
+          (match_operand:VNx4SF_ONLY 2 "register_operand" "w")]
+         SVE_COND_FCVT))]
+  "TARGET_SVE_BF16"
+  "bfcvt\t%0.h, %1/m, %2.s"
+)
+
+;; Predicated BFCVT with merging.
+(define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
+       (unspec:VNx8BF_ONLY
+         [(match_operand:VNx4BI 1 "register_operand")
+          (unspec:VNx8BF_ONLY
+            [(match_dup 1)
+             (const_int SVE_STRICT_GP)
+             (match_operand:VNx4SF_ONLY 2 "register_operand")]
+            SVE_COND_FCVT)
+          (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
+         UNSPEC_SEL))]
+  "TARGET_SVE_BF16"
+)
+
+(define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
+       (unspec:VNx8BF_ONLY
+         [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
+          (unspec:VNx8BF_ONLY
+            [(match_dup 1)
+             (match_operand:SI 4 "aarch64_sve_gp_strictness")
+             (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
+            SVE_COND_FCVT)
+          (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+         UNSPEC_SEL))]
+  "TARGET_SVE_BF16"
+  "@
+   bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated BFCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; This instructions does not take MOVPRFX.
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+       (unspec:VNx8BF_ONLY
+         [(match_operand:VNx4BI 2 "register_operand" "Upl")
+          (const_int SVE_STRICT_GP)
+          (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
+          (match_operand:VNx4SF 3 "register_operand" "w")]
+         UNSPEC_COND_FCVTNT))]
+  "TARGET_SVE_BF16"
+  "bfcvtnt\t%0.h, %2/m, %3.s"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP<-FP] Unpacks
 ;; -------------------------------------------------------------------------
index eaded5d34808297a2efe3c3fb64dede8d0c806b9..f82e60e25c79bbe18a4257807f78368116ec6d68 100644 (file)
 ;; elements, even if the inactive elements don't matter.
 ;;
 ;; These instructions do not take MOVPRFX.
-(define_insn "@aarch64_sve2_cvtnt<mode>"
-  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
-       (unspec:<VNARROW>
-         [(match_operand:<VPRED> 2 "register_operand" "Upl")
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
+       (unspec:SVE_FULL_HSF
+         [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
           (const_int SVE_STRICT_GP)
-          (match_operand:<VNARROW> 1 "register_operand" "0")
-          (match_operand:SVE_FULL_SDF 3 "register_operand" "w")]
+          (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
+          (match_operand:<VWIDE> 3 "register_operand" "w")]
          UNSPEC_COND_FCVTNT))]
   "TARGET_SVE2"
-  "fcvtnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+  "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
 )
 
 ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
index 043e26a089826ba4cb56422f5a558156e62778f2..8f08bad3562c4cbe8acdf5891e84f89d23ea6784 100644 (file)
@@ -362,6 +362,7 @@ extern unsigned aarch64_architecture_version;
 /* BF16 instructions are enabled through +bf16.  */
 #define TARGET_BF16_FP (AARCH64_ISA_BF16)
 #define TARGET_BF16_SIMD (AARCH64_ISA_BF16 && TARGET_SIMD)
+#define TARGET_SVE_BF16 (TARGET_SVE && AARCH64_ISA_BF16)
 
 /* Make sure this is always defined so we don't have to check for ifdefs
    but rather use normal ifs.  */
index d5b60e08bafe80992a8c2a08280da41be4abfeff..3e3fd9d0cd26185d92be264b27321b28f4c99e46 100644 (file)
 ;; Iterators for single modes, for "@" patterns.
 (define_mode_iterator VNx16QI_ONLY [VNx16QI])
 (define_mode_iterator VNx8HI_ONLY [VNx8HI])
+(define_mode_iterator VNx8BF_ONLY [VNx8BF])
 (define_mode_iterator VNx4SI_ONLY [VNx4SI])
 (define_mode_iterator VNx4SF_ONLY [VNx4SF])
 (define_mode_iterator VNx2DI_ONLY [VNx2DI])
     UNSPEC_USDOT       ; Used in aarch64-simd.md.
     UNSPEC_SUDOT       ; Used in aarch64-simd.md.
     UNSPEC_BFDOT       ; Used in aarch64-simd.md.
+    UNSPEC_BFMLALB     ; Used in aarch64-sve.md.
+    UNSPEC_BFMLALT     ; Used in aarch64-sve.md.
+    UNSPEC_BFMMLA      ; Used in aarch64-sve.md.
 ])
 
 ;; ------------------------------------------------------------------
 
 (define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
 
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT
+                                             UNSPEC_BFMLALB
+                                             UNSPEC_BFMLALT
+                                             UNSPEC_BFMMLA])
+
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT
+                                                  UNSPEC_BFMLALB
+                                                  UNSPEC_BFMLALT])
+
 (define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
                                        UNSPEC_IORV
                                        UNSPEC_SMAXV
                                  (UNSPEC_SQDMULLBT "sqdmlslbt")
                                  (UNSPEC_SQDMULLT "sqdmlslt")])
 
-(define_int_attr sve_fp_op [(UNSPEC_FRECPE "frecpe")
+(define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot")
+                           (UNSPEC_BFMLALB "bfmlalb")
+                           (UNSPEC_BFMLALT "bfmlalt")
+                           (UNSPEC_BFMMLA "bfmmla")
+                           (UNSPEC_FRECPE "frecpe")
                            (UNSPEC_FRECPS "frecps")
                            (UNSPEC_RSQRTE "frsqrte")
                            (UNSPEC_RSQRTS "frsqrts")
index 5d002d9b81505ed7674b228766b1275aaf1ddb61..b23973461d983170c28669030e9040a954420212 100644 (file)
@@ -1,3 +1,24 @@
+2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * lib/target-supports.exp (check_effective_target_aarch64_asm_bf16_ok):
+       New proc.
+       * gcc.target/aarch64/sve/acle/asm/bfdot_f32.c: New test.
+       * gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/cvt_bf16.c: Likweise.
+       * gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c: Likweise.
+       * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c: Likweise.
+       * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c:
+       Likweise.
+       * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c:
+       Likweise.
+       * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c:
+       Likweise.
+
 2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
 
        * g++.target/aarch64/sve/acle/general-c++/mangle_1.C: Test mangling
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c
new file mode 100644 (file)
index 0000000..376622d
--- /dev/null
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfdot_f32_tied1:
+**     bfdot   z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfdot_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_f32 (z0, z4, z5),
+            z0 = svbfdot (z0, z4, z5))
+
+/*
+** bfdot_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfdot   z0\.s, \1\.h, z1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfdot_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfdot_f32 (z4, z0, z1),
+                z0_res = svbfdot (z4, z0, z1))
+
+/*
+** bfdot_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfdot   z0\.s, z1\.h, \1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfdot_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfdot_f32 (z4, z1, z0),
+                z0_res = svbfdot (z4, z1, z0))
+
+/*
+** bfdot_f32_untied:
+**     movprfx z0, z1
+**     bfdot   z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfdot_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_f32 (z1, z4, z5),
+            z0 = svbfdot (z1, z4, z5))
+
+/*
+** bfdot_h7_f32_tied1:
+**     mov     (z[0-9]+\.h), h7
+**     bfdot   z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfdot_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfdot_n_f32 (z0, z4, d7),
+             z0 = svbfdot (z0, z4, d7))
+
+/*
+** bfdot_h7_f32_untied:
+**     mov     (z[0-9]+\.h), h7
+**     movprfx z0, z1
+**     bfdot   z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfdot_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfdot_n_f32 (z1, z4, d7),
+             z0 = svbfdot (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c
new file mode 100644 (file)
index 0000000..0f624fe
--- /dev/null
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfdot_lane_0_f32_tied1:
+**     bfdot   z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfdot_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_lane_f32 (z0, z4, z5, 0),
+            z0 = svbfdot_lane (z0, z4, z5, 0))
+
+/*
+** bfdot_lane_0_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfdot   z0\.s, \1\.h, z1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfdot_lane_f32 (z4, z0, z1, 0),
+                z0_res = svbfdot_lane (z4, z0, z1, 0))
+
+/*
+** bfdot_lane_0_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfdot   z0\.s, z1\.h, \1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfdot_lane_f32 (z4, z1, z0, 0),
+                z0_res = svbfdot_lane (z4, z1, z0, 0))
+
+/*
+** bfdot_lane_0_f32_untied:
+**     movprfx z0, z1
+**     bfdot   z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfdot_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_lane_f32 (z1, z4, z5, 0),
+            z0 = svbfdot_lane (z1, z4, z5, 0))
+
+/*
+** bfdot_lane_1_f32:
+**     bfdot   z0\.s, z4\.h, z5\.h\[1\]
+**     ret
+*/
+TEST_DUAL_Z (bfdot_lane_1_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_lane_f32 (z0, z4, z5, 1),
+            z0 = svbfdot_lane (z0, z4, z5, 1))
+
+/*
+** bfdot_lane_3_f32:
+**     bfdot   z0\.s, z4\.h, z5\.h\[3\]
+**     ret
+*/
+TEST_DUAL_Z (bfdot_lane_3_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfdot_lane_f32 (z0, z4, z5, 3),
+            z0 = svbfdot_lane (z0, z4, z5, 3))
+
+/*
+** bfdot_lane_z8_f32:
+**     str     d8, \[sp, -16\]!
+**     mov     (z[0-7])\.d, z8\.d
+**     bfdot   z0\.s, z1\.h, \1\.h\[1\]
+**     ldr     d8, \[sp\], 16
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfdot_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+                   z0 = svbfdot_lane_f32 (z0, z1, z8, 1),
+                   z0 = svbfdot_lane (z0, z1, z8, 1))
+
+/*
+** bfdot_lane_z16_f32:
+**     mov     (z[0-7])\.d, z16\.d
+**     bfdot   z0\.s, z1\.h, \1\.h\[1\]
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfdot_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+                   z0 = svbfdot_lane_f32 (z0, z1, z16, 1),
+                   z0 = svbfdot_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c
new file mode 100644 (file)
index 0000000..0f81011
--- /dev/null
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalb_f32_tied1:
+**     bfmlalb z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_f32 (z0, z4, z5),
+            z0 = svbfmlalb (z0, z4, z5))
+
+/*
+** bfmlalb_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalb z0\.s, \1\.h, z1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalb_f32 (z4, z0, z1),
+                z0_res = svbfmlalb (z4, z0, z1))
+
+/*
+** bfmlalb_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalb z0\.s, z1\.h, \1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalb_f32 (z4, z1, z0),
+                z0_res = svbfmlalb (z4, z1, z0))
+
+/*
+** bfmlalb_f32_untied:
+**     movprfx z0, z1
+**     bfmlalb z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_f32 (z1, z4, z5),
+            z0 = svbfmlalb (z1, z4, z5))
+
+/*
+** bfmlalb_h7_f32_tied1:
+**     mov     (z[0-9]+\.h), h7
+**     bfmlalb z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfmlalb_n_f32 (z0, z4, d7),
+             z0 = svbfmlalb (z0, z4, d7))
+
+/*
+** bfmlalb_h7_f32_untied:
+**     mov     (z[0-9]+\.h), h7
+**     movprfx z0, z1
+**     bfmlalb z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfmlalb_n_f32 (z1, z4, d7),
+             z0 = svbfmlalb (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c
new file mode 100644 (file)
index 0000000..b0ec088
--- /dev/null
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalb_lane_0_f32_tied1:
+**     bfmlalb z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_lane_f32 (z0, z4, z5, 0),
+            z0 = svbfmlalb_lane (z0, z4, z5, 0))
+
+/*
+** bfmlalb_lane_0_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalb z0\.s, \1\.h, z1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalb_lane_f32 (z4, z0, z1, 0),
+                z0_res = svbfmlalb_lane (z4, z0, z1, 0))
+
+/*
+** bfmlalb_lane_0_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalb z0\.s, z1\.h, \1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalb_lane_f32 (z4, z1, z0, 0),
+                z0_res = svbfmlalb_lane (z4, z1, z0, 0))
+
+/*
+** bfmlalb_lane_0_f32_untied:
+**     movprfx z0, z1
+**     bfmlalb z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_lane_f32 (z1, z4, z5, 0),
+            z0 = svbfmlalb_lane (z1, z4, z5, 0))
+
+/*
+** bfmlalb_lane_1_f32:
+**     bfmlalb z0\.s, z4\.h, z5\.h\[1\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_1_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_lane_f32 (z0, z4, z5, 1),
+            z0 = svbfmlalb_lane (z0, z4, z5, 1))
+
+/*
+** bfmlalb_lane_7_f32:
+**     bfmlalb z0\.s, z4\.h, z5\.h\[7\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_7_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalb_lane_f32 (z0, z4, z5, 7),
+            z0 = svbfmlalb_lane (z0, z4, z5, 7))
+
+/*
+** bfmlalb_lane_z8_f32:
+**     str     d8, \[sp, -16\]!
+**     mov     (z[0-7])\.d, z8\.d
+**     bfmlalb z0\.s, z1\.h, \1\.h\[1\]
+**     ldr     d8, \[sp\], 16
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfmlalb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+                   z0 = svbfmlalb_lane_f32 (z0, z1, z8, 1),
+                   z0 = svbfmlalb_lane (z0, z1, z8, 1))
+
+/*
+** bfmlalb_lane_z16_f32:
+**     mov     (z[0-7])\.d, z16\.d
+**     bfmlalb z0\.s, z1\.h, \1\.h\[1\]
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfmlalb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+                   z0 = svbfmlalb_lane_f32 (z0, z1, z16, 1),
+                   z0 = svbfmlalb_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c
new file mode 100644 (file)
index 0000000..2a583fa
--- /dev/null
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalt_f32_tied1:
+**     bfmlalt z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_f32 (z0, z4, z5),
+            z0 = svbfmlalt (z0, z4, z5))
+
+/*
+** bfmlalt_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalt z0\.s, \1\.h, z1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalt_f32 (z4, z0, z1),
+                z0_res = svbfmlalt (z4, z0, z1))
+
+/*
+** bfmlalt_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalt z0\.s, z1\.h, \1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalt_f32 (z4, z1, z0),
+                z0_res = svbfmlalt (z4, z1, z0))
+
+/*
+** bfmlalt_f32_untied:
+**     movprfx z0, z1
+**     bfmlalt z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_f32 (z1, z4, z5),
+            z0 = svbfmlalt (z1, z4, z5))
+
+/*
+** bfmlalt_h7_f32_tied1:
+**     mov     (z[0-9]+\.h), h7
+**     bfmlalt z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfmlalt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfmlalt_n_f32 (z0, z4, d7),
+             z0 = svbfmlalt (z0, z4, d7))
+
+/*
+** bfmlalt_h7_f32_untied:
+**     mov     (z[0-9]+\.h), h7
+**     movprfx z0, z1
+**     bfmlalt z0\.s, z4\.h, \1
+**     ret
+*/
+TEST_DUAL_ZD (bfmlalt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+             z0 = svbfmlalt_n_f32 (z1, z4, d7),
+             z0 = svbfmlalt (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c
new file mode 100644 (file)
index 0000000..3af3997
--- /dev/null
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalt_lane_0_f32_tied1:
+**     bfmlalt z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_lane_f32 (z0, z4, z5, 0),
+            z0 = svbfmlalt_lane (z0, z4, z5, 0))
+
+/*
+** bfmlalt_lane_0_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalt z0\.s, \1\.h, z1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalt_lane_f32 (z4, z0, z1, 0),
+                z0_res = svbfmlalt_lane (z4, z0, z1, 0))
+
+/*
+** bfmlalt_lane_0_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmlalt z0\.s, z1\.h, \1\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmlalt_lane_f32 (z4, z1, z0, 0),
+                z0_res = svbfmlalt_lane (z4, z1, z0, 0))
+
+/*
+** bfmlalt_lane_0_f32_untied:
+**     movprfx z0, z1
+**     bfmlalt z0\.s, z4\.h, z5\.h\[0\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_lane_f32 (z1, z4, z5, 0),
+            z0 = svbfmlalt_lane (z1, z4, z5, 0))
+
+/*
+** bfmlalt_lane_1_f32:
+**     bfmlalt z0\.s, z4\.h, z5\.h\[1\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_1_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_lane_f32 (z0, z4, z5, 1),
+            z0 = svbfmlalt_lane (z0, z4, z5, 1))
+
+/*
+** bfmlalt_lane_7_f32:
+**     bfmlalt z0\.s, z4\.h, z5\.h\[7\]
+**     ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_7_f32, svfloat32_t, svbfloat16_t,
+            z0 = svbfmlalt_lane_f32 (z0, z4, z5, 7),
+            z0 = svbfmlalt_lane (z0, z4, z5, 7))
+
+/*
+** bfmlalt_lane_z8_f32:
+**     str     d8, \[sp, -16\]!
+**     mov     (z[0-7])\.d, z8\.d
+**     bfmlalt z0\.s, z1\.h, \1\.h\[1\]
+**     ldr     d8, \[sp\], 16
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfmlalt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+                   z0 = svbfmlalt_lane_f32 (z0, z1, z8, 1),
+                   z0 = svbfmlalt_lane (z0, z1, z8, 1))
+
+/*
+** bfmlalt_lane_z16_f32:
+**     mov     (z[0-7])\.d, z16\.d
+**     bfmlalt z0\.s, z1\.h, \1\.h\[1\]
+**     ret
+*/
+TEST_DUAL_LANE_REG (bfmlalt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+                   z0 = svbfmlalt_lane_f32 (z0, z1, z16, 1),
+                   z0 = svbfmlalt_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c
new file mode 100644 (file)
index 0000000..b1d98fb
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmmla_f32_tied1:
+**     bfmmla  z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmmla_f32_tied1, svfloat32_t, svbfloat16_t,
+            z0 = svbfmmla_f32 (z0, z4, z5),
+            z0 = svbfmmla (z0, z4, z5))
+
+/*
+** bfmmla_f32_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmmla  z0\.s, \1\.h, z1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmmla_f32_tied2, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmmla_f32 (z4, z0, z1),
+                z0_res = svbfmmla (z4, z0, z1))
+
+/*
+** bfmmla_f32_tied3:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfmmla  z0\.s, z1\.h, \1\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (bfmmla_f32_tied3, svfloat32_t, svbfloat16_t,
+                z0_res = svbfmmla_f32 (z4, z1, z0),
+                z0_res = svbfmmla (z4, z1, z0))
+
+/*
+** bfmmla_f32_untied:
+**     movprfx z0, z1
+**     bfmmla  z0\.s, z4\.h, z5\.h
+**     ret
+*/
+TEST_DUAL_Z (bfmmla_f32_untied, svfloat32_t, svbfloat16_t,
+            z0 = svbfmmla_f32 (z1, z4, z5),
+            z0 = svbfmmla (z1, z4, z5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c
new file mode 100644 (file)
index 0000000..52baa1f
--- /dev/null
@@ -0,0 +1,96 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** cvt_bf16_f32_m_tied1:
+**     bfcvt   z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_m (z0, p0, z4),
+            z0 = svcvt_bf16_m (z0, p0, z4))
+
+/*
+** cvt_bf16_f32_m_tied2:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0, z4
+**     bfcvt   z0\.h, p0/m, \1\.s
+**     ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
+                z0_res = svcvt_bf16_f32_m (z4, p0, z0),
+                z0_res = svcvt_bf16_m (z4, p0, z0))
+
+/*
+** cvt_bf16_f32_m_untied:
+**     movprfx z0, z1
+**     bfcvt   z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_m (z1, p0, z4),
+            z0 = svcvt_bf16_m (z1, p0, z4))
+
+/*
+** cvt_bf16_f32_z_tied1:
+**     mov     (z[0-9]+)\.d, z0\.d
+**     movprfx z0\.s, p0/z, \1\.s
+**     bfcvt   z0\.h, p0/m, \1\.s
+**     ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t,
+                z0_res = svcvt_bf16_f32_z (p0, z0),
+                z0_res = svcvt_bf16_z (p0, z0))
+
+/*
+** cvt_bf16_f32_z_untied:
+**     movprfx z0\.s, p0/z, z4\.s
+**     bfcvt   z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_z (p0, z4),
+            z0 = svcvt_bf16_z (p0, z4))
+
+/*
+** cvt_bf16_f32_x_tied1:
+**     bfcvt   z0\.h, p0/m, z0\.s
+**     ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+                z0_res = svcvt_bf16_f32_x (p0, z0),
+                z0_res = svcvt_bf16_x (p0, z0))
+
+/*
+** cvt_bf16_f32_x_untied:
+**     bfcvt   z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_x (p0, z4),
+            z0 = svcvt_bf16_x (p0, z4))
+
+/*
+** ptrue_cvt_bf16_f32_x_tied1:
+**     ...
+**     ptrue   p[0-9]+\.b[^\n]*
+**     ...
+**     ret
+*/
+TEST_DUAL_Z_REV (ptrue_cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+                z0_res = svcvt_bf16_f32_x (svptrue_b32 (), z0),
+                z0_res = svcvt_bf16_x (svptrue_b32 (), z0))
+
+/*
+** ptrue_cvt_bf16_f32_x_untied:
+**     ...
+**     ptrue   p[0-9]+\.b[^\n]*
+**     ...
+**     ret
+*/
+TEST_DUAL_Z (ptrue_cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvt_bf16_f32_x (svptrue_b32 (), z4),
+            z0 = svcvt_bf16_x (svptrue_b32 (), z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c
new file mode 100644 (file)
index 0000000..54614c9
--- /dev/null
@@ -0,0 +1,90 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** cvtnt_bf16_f32_m_tied1:
+**     bfcvtnt z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_m (z0, p0, z4),
+            z0 = svcvtnt_bf16_m (z0, p0, z4))
+
+/* Bad RA choice: no preferred output sequence.  */
+TEST_DUAL_Z_REV (cvtnt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
+                z0_res = svcvtnt_bf16_f32_m (z4, p0, z0),
+                z0_res = svcvtnt_bf16_m (z4, p0, z0))
+
+/*
+** cvtnt_bf16_f32_m_untied:
+** (
+**     mov     z0\.d, z1\.d
+**     bfcvtnt z0\.h, p0/m, z4\.s
+** |
+**     bfcvtnt z1\.h, p0/m, z4\.s
+**     mov     z0\.d, z1\.d
+** )
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_m (z1, p0, z4),
+            z0 = svcvtnt_bf16_m (z1, p0, z4))
+
+/*
+** cvtnt_bf16_f32_x_tied1:
+**     bfcvtnt z0\.h, p0/m, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_x (z0, p0, z4),
+            z0 = svcvtnt_bf16_x (z0, p0, z4))
+
+/* Bad RA choice: no preferred output sequence.  */
+TEST_DUAL_Z_REV (cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
+                z0_res = svcvtnt_bf16_f32_x (z4, p0, z0),
+                z0_res = svcvtnt_bf16_x (z4, p0, z0))
+
+/*
+** cvtnt_bf16_f32_x_untied:
+** (
+**     mov     z0\.d, z1\.d
+**     bfcvtnt z0\.h, p0/m, z4\.s
+** |
+**     bfcvtnt z1\.h, p0/m, z4\.s
+**     mov     z0\.d, z1\.d
+** )
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_x (z1, p0, z4),
+            z0 = svcvtnt_bf16_x (z1, p0, z4))
+
+/*
+** ptrue_cvtnt_bf16_f32_x_tied1:
+**     ...
+**     ptrue   p[0-9]+\.b[^\n]*
+**     ...
+**     ret
+*/
+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_x (z0, svptrue_b32 (), z4),
+            z0 = svcvtnt_bf16_x (z0, svptrue_b32 (), z4))
+
+/* Bad RA choice: no preferred output sequence.  */
+TEST_DUAL_Z_REV (ptrue_cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
+                z0_res = svcvtnt_bf16_f32_x (z4, svptrue_b32 (), z0),
+                z0_res = svcvtnt_bf16_x (z4, svptrue_b32 (), z0))
+
+/*
+** ptrue_cvtnt_bf16_f32_x_untied:
+**     ...
+**     ptrue   p[0-9]+\.b[^\n]*
+**     ...
+**     ret
+*/
+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_x (z1, svptrue_b32 (), z4),
+            z0 = svcvtnt_bf16_x (z1, svptrue_b32 (), z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c
new file mode 100644 (file)
index 0000000..a923332
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf)
+{
+  svbfmmla (f32, bf16); /* { dg-error {too few arguments to function 'svbfmmla'} } */
+  svbfmmla (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfmmla'} } */
+  svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE vector type} } */
+  svbfmmla (pg, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svbool_t' arguments} } */
+  svbfmmla (u8, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint8_t' arguments} } */
+  svbfmmla (u16, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint16_t' arguments} } */
+  svbfmmla (f64, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svfloat64_t' arguments} } */
+  svbfmmla (f32, bf16, bf16);
+  svbfmmla (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, bf16, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, bf16, bf); /* { dg-error {passing 'bfloat16_t'[^\n]* to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c
new file mode 100644 (file)
index 0000000..23f027f
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i)
+{
+  svbfmlalb_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfmlalb_lane'} } */
+  svbfmlalb_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfmlalb_lane'} } */
+  svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE vector type} } */
+  svbfmlalb_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svbool_t' arguments} } */
+  svbfmlalb_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint8_t' arguments} } */
+  svbfmlalb_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint16_t' arguments} } */
+  svbfmlalb_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svfloat64_t' arguments} } */
+  svbfmlalb_lane (f32, bf16, bf16, 0);
+  svbfmlalb_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */
+  svbfmlalb_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */
+
+  svbfmlalb_lane (f32, bf16, bf16, 0);
+  svbfmlalb_lane (f32, bf16, bf16, 7);
+  svbfmlalb_lane (f32, bf16, bf16, 8); /* { dg-error {passing 8 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */
+  svbfmlalb_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c
new file mode 100644 (file)
index 0000000..4755ca7
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i)
+{
+  svbfdot_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfdot_lane'} } */
+  svbfdot_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfdot_lane'} } */
+  svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE vector type} } */
+  svbfdot_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svbool_t' arguments} } */
+  svbfdot_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint8_t' arguments} } */
+  svbfdot_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint16_t' arguments} } */
+  svbfdot_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svfloat64_t' arguments} } */
+  svbfdot_lane (f32, bf16, bf16, 0);
+  svbfdot_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */
+  svbfdot_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */
+
+  svbfdot_lane (f32, bf16, bf16, 0);
+  svbfdot_lane (f32, bf16, bf16, 3);
+  svbfdot_lane (f32, bf16, bf16, 4); /* { dg-error {passing 4 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */
+  svbfdot_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c
new file mode 100644 (file)
index 0000000..2d09a8e
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf)
+{
+  svbfdot (f32, bf16); /* { dg-error {too few arguments to function 'svbfdot'} } */
+  svbfdot (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfdot'} } */
+  svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE vector type} } */
+  svbfdot (pg, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svbool_t' arguments} } */
+  svbfdot (u8, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint8_t' arguments} } */
+  svbfdot (u16, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint16_t' arguments} } */
+  svbfdot (f64, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svfloat64_t' arguments} } */
+  svbfdot (f32, bf16, bf16);
+  svbfdot (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */
+  svbfdot (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */
+  svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */
+  svbfdot (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot', which expects 'svbfloat16_t'} } */
+  svbfdot (f32, bf16, bf);
+}
index 6c06faf580a0d5950a90ec8bbbef30e3f7ae37b7..5377d7b11cb8cbdb1f9e958ef315e564b9c3be9d 100644 (file)
@@ -8996,7 +8996,7 @@ proc check_effective_target_aarch64_tiny { } {
 # various architecture extensions via the .arch_extension pseudo-op.
 
 foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
-                         "i8mm" "f32mm" "f64mm" } {
+                         "i8mm" "f32mm" "f64mm" "bf16" } {
     eval [string map [list FUNC $aarch64_ext] {
        proc check_effective_target_aarch64_asm_FUNC_ok { } {
          if { [istarget aarch64*-*-*] } {