From: Claudio Bantaloukas Date: Wed, 24 Dec 2025 11:41:26 +0000 (+0000) Subject: aarch64: add 8-bit floating-point sum of outer products and accumulate X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=68fbdf216f97ffca8aad4020784a7e91f92af57a;p=thirdparty%2Fgcc.git aarch64: add 8-bit floating-point sum of outer products and accumulate This patch adds support for FMOPA (widening, 2-way, FP8 to FP16) when sme-f8f16 is enabled using svmopa_za16[_mf8]_m_fpm and for FMOPA (widening, 4-way) when sme-f8f32 is enabled using svmopa_za32[_mf8]_m_fpm. Asm tests for the new intrinsics are added, similar to those for existing mopa_z16 intrinsics. Tests for the binary_za_m shape are added. gcc: * config/aarch64/aarch64-sme.md (@aarch64_sme_): Add new define_insn. * config/aarch64/aarch64-sve-builtins-shapes.cc (struct binary_za_m_base): Support fpm argument. * config/aarch64/aarch64-sve-builtins-sme.cc (svmopa_za): Extend for fp8. * config/aarch64/aarch64-sve-builtins-sme.def (svmopa): Add new DEF_SME_ZA_FUNCTION_GS_FPM entries. gcc/testsuite: * gcc.target/aarch64/sme/acle-asm/test_sme_acle.h: (TEST_UNIFORM_ZA): Add fpm0 parameter. * gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c: Add tests for variants accepting fpm. * gcc.target/aarch64/sme2/acle-asm/mopa_za16_mf8.c: New test. * gcc.target/aarch64/sme2/acle-asm/mopa_za32_mf8.c: Likewise. --- diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index e93f83b3983..7201e0f0ec3 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -2370,6 +2370,8 @@ ;; - BFMOPS (SME_B16B16) ;; - FMOPA ;; - FMOPS +;; - FMOPA (SME_F8F16) +;; - FMOPA (SME_F8F32) ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sme_" @@ -2402,6 +2404,22 @@ "\tza%0., %1/m, %2/m, %3., %4." ) +(define_insn "@aarch64_sme_" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:DI 0 "const_int_operand") + (match_operand: 1 "register_operand" "Upl") + (match_operand: 2 "register_operand" "Upl") + (match_operand:VNx16QI_ONLY 3 "register_operand" "w") + (match_operand:VNx16QI_ONLY 4 "register_operand" "w") + (reg:DI FPM_REGNUM)] + SME_FP_MOP))] + "TARGET_STREAMING" + "\tza%0., %1/m, %2/m, %3.b, %4.b" +) + ;; ========================================================================= ;; == Table lookup ;; ========================================================================= diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index 59f313d08f2..ea4be3733c2 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -692,7 +692,7 @@ struct binary_za_m_base : public overloaded_base<1> resolve (function_resolver &r) const override { type_suffix_index type; - if (!r.check_num_arguments (5) + if (!r.check_num_arguments (r.fpm_mode == FPM_set ? 6: 5) || !r.require_integer_immediate (0) || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc index 43ef05c673a..20a6ebc4059 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc @@ -651,7 +651,7 @@ FUNCTION (svmls_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLS, UNSPEC_SME_UMLS, UNSPEC_SME_FMLS)) FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA, - UNSPEC_SME_FMOPA)) + UNSPEC_SME_FMOPA, UNSPEC_SME_FMOPA)) FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS, UNSPEC_SME_FMOPS)) FUNCTION (svread_za, svread_za_impl,) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def index f9ad6837f44..6306ee33a14 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def @@ -270,6 +270,7 @@ DEF_SME_ZA_FUNCTION_GS_FPM (svmla_lane, binary_za_slice_lane, za_h_mf8, vg2, none, set) DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_h_mf8, vg2, none, set) DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_h_mf8, vg1x24, none, set) +DEF_SME_ZA_FUNCTION_GS_FPM (svmopa, binary_za_m, za_h_mf8, none, za_m, set) #undef REQUIRED_EXTENSIONS #define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F8F32) @@ -277,6 +278,7 @@ DEF_SME_ZA_FUNCTION_GS_FPM (svmla_lane, binary_za_slice_lane, za_s_mf8, vg4, none, set) DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_s_mf8, vg4, none, set) DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_s_mf8, vg1x24, none, set) +DEF_SME_ZA_FUNCTION_GS_FPM (svmopa, binary_za_m, za_s_mf8, none, za_m, set) #undef REQUIRED_EXTENSIONS #undef DEF_SME_ZA_FUNCTION diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h index aaadab2f773..75e3413768e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h @@ -46,7 +46,7 @@ #define TEST_UNIFORM_ZA(NAME, TYPE, CODE1, CODE2) \ PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0, \ - svbool_t p1)) \ + svbool_t p1, fpm_t fpm0)) \ { \ INVOKE (CODE1, CODE2); \ } diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_mf8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_mf8.c new file mode 100644 index 00000000000..e88b7a4814c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_mf8.c @@ -0,0 +1,36 @@ +/* { dg-do assemble { target aarch64_asm_sme-f8f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f8f16" +/* +** mopa_za16_mf8_0_p0_p1_z0_z1: +** msr fpmr, x0 +** fmopa za0\.h, p0/m, p1/m, z0\.b, z1\.b +** ret +*/ +TEST_UNIFORM_ZA (mopa_za16_mf8_0_p0_p1_z0_z1, svmfloat8_t, + svmopa_za16_mf8_m_fpm (0, p0, p1, z0, z1, fpm0), + svmopa_za16_m_fpm (0, p0, p1, z0, z1, fpm0)) + +/* +** mopa_za16_mf8_0_p1_p0_z1_z0: +** msr fpmr, x0 +** fmopa za0\.h, p1/m, p0/m, z1\.b, z0\.b +** ret +*/ +TEST_UNIFORM_ZA (mopa_za16_mf8_0_p1_p0_z1_z0, svmfloat8_t, + svmopa_za16_mf8_m_fpm (0, p1, p0, z1, z0, fpm0), + svmopa_za16_m_fpm (0, p1, p0, z1, z0, fpm0)) + +/* +** mopa_za16_mf8_1_p0_p1_z0_z1: +** msr fpmr, x0 +** fmopa za1\.h, p0/m, p1/m, z0\.b, z1\.b +** ret +*/ +TEST_UNIFORM_ZA (mopa_za16_mf8_1_p0_p1_z0_z1, svmfloat8_t, + svmopa_za16_mf8_m_fpm (1, p0, p1, z0, z1, fpm0), + svmopa_za16_m_fpm (1, p0, p1, z0, z1, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32_mf8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32_mf8.c new file mode 100644 index 00000000000..74a665fea6b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32_mf8.c @@ -0,0 +1,36 @@ +/* { dg-do assemble { target aarch64_asm_sme-f8f32_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f8f32" +/* +** mopa_za32_mf8_0_p0_p1_z0_z1: +** msr fpmr, x0 +** fmopa za0\.s, p0/m, p1/m, z0\.b, z1\.b +** ret +*/ +TEST_UNIFORM_ZA (mopa_za32_mf8_0_p0_p1_z0_z1, svmfloat8_t, + svmopa_za32_mf8_m_fpm (0, p0, p1, z0, z1, fpm0), + svmopa_za32_m_fpm (0, p0, p1, z0, z1, fpm0)) + +/* +** mopa_za32_mf8_0_p1_p0_z1_z0: +** msr fpmr, x0 +** fmopa za0\.s, p1/m, p0/m, z1\.b, z0\.b +** ret +*/ +TEST_UNIFORM_ZA (mopa_za32_mf8_0_p1_p0_z1_z0, svmfloat8_t, + svmopa_za32_mf8_m_fpm (0, p1, p0, z1, z0, fpm0), + svmopa_za32_m_fpm (0, p1, p0, z1, z0, fpm0)) + +/* +** mopa_za32_mf8_1_p0_p1_z0_z1: +** msr fpmr, x0 +** fmopa za1\.s, p0/m, p1/m, z0\.b, z1\.b +** ret +*/ +TEST_UNIFORM_ZA (mopa_za32_mf8_1_p0_p1_z0_z1, svmfloat8_t, + svmopa_za32_mf8_m_fpm (1, p0, p1, z0, z1, fpm0), + svmopa_za32_m_fpm (1, p0, p1, z0, z1, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c index 44c3e48e916..5f013bd4194 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c @@ -46,3 +46,17 @@ f4 (svbool_t pg, svint16_t s16) __arm_streaming __arm_inout("za") svmopa_za64_m (-1, pg, pg, s16, s16); /* { dg-error {passing -1 to argument 1 of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */ svmopa_za64_m (8, pg, pg, s16, s16); /* { dg-error {passing 8 to argument 1 of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */ } + +#pragma GCC target ("arch=armv9-a+sme-f8f16+sme-f8f32") + +void +f5 (svbool_t pg, svmfloat8_t mf8, fpm_t fpm) __arm_streaming __arm_inout("za") +{ + svmopa_za16_mf8_m_fpm(0, pg, pg, mf8, mf8); /* { dg-error {too few arguments to function 'svmopa_za16_mf8_m_fpm'} } */ + svmopa_za16_mf8_m_fpm(0, pg, pg, mf8, mf8, fpm); + svmopa_za16_mf8_m_fpm(0, pg, pg, mf8, mf8, fpm, fpm); /* { dg-error {too many arguments to function 'svmopa_za16_mf8_m_fpm'; expected 6, have 7} } */ + + svmopa_za16_mf8_m_fpm(-1, pg, pg, mf8, mf8, fpm); /* { dg-error {passing -1 to argument 1 of 'svmopa_za16_mf8_m_fpm', which expects a value in the range \[0, 1\]} } */ + svmopa_za16_mf8_m_fpm(2, pg, pg, mf8, mf8, fpm); /* { dg-error {passing 2 to argument 1 of 'svmopa_za16_mf8_m_fpm', which expects a value in the range \[0, 1\]} } */ + svmopa_za32_mf8_m_fpm(4, pg, pg, mf8, mf8, fpm); /* { dg-error {passing 4 to argument 1 of 'svmopa_za32_mf8_m_fpm', which expects a value in the range \[0, 3\]} } */ +}