From: Claudio Bantaloukas Date: Wed, 24 Dec 2025 11:41:25 +0000 (+0000) Subject: aarch64: extend sme intrinsics to mfp8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9674a37d77600176169a1ec081b510653f37b85e;p=thirdparty%2Fgcc.git aarch64: extend sme intrinsics to mfp8 This patch extends the following intrinsics to support svmfloat8_t types and adds tests based on the equivalent ones for svuint8_t. SME: - svread_hor_za8[_mf8]_m, svread_hor_za128[_mf8]_m and related ver. - svwrite_hor_za8[_mf8]_m, svwrite_hor_za128[_mf8]_m and related ver. SME2: - svread_hor_za8_mf8_vg2, svread_hor_za8_mf8_vg4 and related ver. - svwrite_hor_za8[_mf8]_vg2, svwrite_hor_za8[_mf8]_vg4 and related ver. - svread_za8[_mf8]_vg1x2, svread_za8[_mf8]_vg1x4. - svwrite_za8[_mf8]_vg1x2, svwrite_za8[_mf8]_vg1x4. - svsel[_mf8_x2], svsel[_mf8_x4]. - svzip[_mf8_x2], svzip[_mf8_x4]. - svzipq[_mf8_x2], svzipq[_mf8_x4]. - svuzp[_mf8_x2], svuzp[_mf8_x4]. - svuzpq[_mf8_x2], svuzpq[_mf8_x4]. - svld1[_mf8]_x2, svld1[_mf8]_x4. - svld1_vnum[_mf8]_x2, svld1_vnum[_mf8]_x4. SVE2.1/SME2: - svldnt1[_mf8]_x2, svldnt1[_mf8]_x4. - svldnt1_vnum[_mf8]_x2, svldnt1_vnum[_mf8]_x4. - svrevd[_mf8]_m, svrevd[_mf8]_z, svrevd[_mf8]_x. - svst1[_mf8_x2], svst1[_mf8_x4]. - svst1_vnum[_mf8_x2], svst1_vnum[_mf8_x4]. - svstnt1[_mf8_x2], svstnt1[_mf8_x4]. - svstnt1_vnum[_mf8_x2], svstnt1_vnum[_mf8_x4]. SME2.1: - svreadz_hor_za8_u8, svreadz_hor_za8_u8_vg2, svreadz_hor_za8_u8_vg4 and related ver. - svreadz_hor_za128_u8, svreadz_ver_za128_u8. - svreadz_za8_u8_vg1x2, svreadz_za8_u8_vg1x4. This change follows ACLE 2024Q4. gcc/ * config/aarch64/aarch64-sve-builtins.cc (TYPES_za_bhsd_data): Add D (za8, mf8) combination to za_bhsd_data. gcc/testsuite/ * gcc.target/aarch64/sme/acle-asm/revd_mf8.c: Added test file. * gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za128.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/sel_mf8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/sel_mf8_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/st1_mf8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/st1_mf8_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_mf8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_mf8_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x4.c: Likewise. * gcc.target/aarch64/sme/acle-asm/read_hor_za128.c: Added mf8 tests. * gcc.target/aarch64/sme/acle-asm/read_hor_za8.c: Likewise. * gcc.target/aarch64/sme/acle-asm/read_ver_za128.c: Likewise. * gcc.target/aarch64/sme/acle-asm/read_ver_za8.c: Likewise. * gcc.target/aarch64/sme/acle-asm/write_hor_za128.c: Likewise. * gcc.target/aarch64/sme/acle-asm/write_hor_za8.c: Likewise. * gcc.target/aarch64/sme/acle-asm/write_ver_za128.c: Likewise. * gcc.target/aarch64/sme/acle-asm/write_ver_za8.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c: Likewise. --- diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index dbd80cab627..e8eeedb4d36 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -640,7 +640,7 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_d_za(S, D) \ S (za64) -/* { _za8 } x { _s8 _u8 } +/* { _za8 } x { _mf8 _s8 _u8 } { _za16 } x { _bf16 _f16 _s16 _u16 } @@ -648,7 +648,7 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { { _za64 } x { _f64 _s64 _u64 }. */ #define TYPES_za_bhsd_data(S, D) \ - D (za8, s8), D (za8, u8), \ + D (za8, mf8), D (za8, s8), D (za8, u8), \ D (za16, bf16), D (za16, f16), D (za16, s16), D (za16, u16), \ D (za32, f32), D (za32, s32), D (za32, u32), \ D (za64, f64), D (za64, s64), D (za64, u64) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c index c8eef3b16fd..fedefe5b824 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c @@ -103,6 +103,16 @@ TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t, z0 = svread_hor_za128_u8_m (z0, p0, 0, w0), z0 = svread_hor_za128_m (z0, p0, 0, w0)) +/* +** read_za128_mf8_0_w0_tied: +** mov (w1[2-5]), w0 +** mova z0\.q, p0/m, za0h\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (read_za128_mf8_0_w0_tied, svmfloat8_t, + z0 = svread_hor_za128_mf8_m (z0, p0, 0, w0), + z0 = svread_hor_za128_m (z0, p0, 0, w0)) + /* ** read_za128_u8_0_w0_untied: ** ( @@ -124,6 +134,27 @@ TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t, z0 = svread_hor_za128_u8_m (z1, p0, 0, w0), z0 = svread_hor_za128_m (z1, p0, 0, w0)) +/* +** read_za128_mf8_0_w0_untied: +** ( +** mov (w1[2-5]), w0 +** mov z0\.d, z1\.d +** mova z0\.q, p0/m, za0h\.q\[\1, 0\] +** | +** mov z0\.d, z1\.d +** mov (w1[2-5]), w0 +** mova z0\.q, p0/m, za0h\.q\[\2, 0\] +** | +** mov (w1[2-5]), w0 +** mova z1\.q, p0/m, za0h\.q\[\3, 0\] +** mov z0\.d, z1\.d +** ) +** ret +*/ +TEST_READ_ZA (read_za128_mf8_0_w0_untied, svmfloat8_t, + z0 = svread_hor_za128_mf8_m (z1, p0, 0, w0), + z0 = svread_hor_za128_m (z1, p0, 0, w0)) + /* ** read_za128_s16_0_w0_tied: ** mov (w1[2-5]), w0 diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c index 0ad5a953f6b..7c04ef30fd0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c @@ -103,6 +103,16 @@ TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, z0 = svread_hor_za8_u8_m (z0, p0, 0, w0), z0 = svread_hor_za8_m (z0, p0, 0, w0)) +/* +** read_za8_mf8_0_w0_tied: +** mov (w1[2-5]), w0 +** mova z0\.b, p0/m, za0h\.b\[\1, 0\] +** ret +*/ +TEST_READ_ZA (read_za8_mf8_0_w0_tied, svmfloat8_t, + z0 = svread_hor_za8_mf8_m (z0, p0, 0, w0), + z0 = svread_hor_za8_m (z0, p0, 0, w0)) + /* ** read_za8_u8_0_w0_untied: ** ( @@ -123,3 +133,24 @@ TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t, z0 = svread_hor_za8_u8_m (z1, p0, 0, w0), z0 = svread_hor_za8_m (z1, p0, 0, w0)) + +/* +** read_za8_mf8_0_w0_untied: +** ( +** mov (w1[2-5]), w0 +** mov z0\.d, z1\.d +** mova z0\.b, p0/m, za0h\.b\[\1, 0\] +** | +** mov z0\.d, z1\.d +** mov (w1[2-5]), w0 +** mova z0\.b, p0/m, za0h\.b\[\2, 0\] +** | +** mov (w1[2-5]), w0 +** mova z1\.b, p0/m, za0h\.b\[\3, 0\] +** mov z0\.d, z1\.d +** ) +** ret +*/ +TEST_READ_ZA (read_za8_mf8_0_w0_untied, svmfloat8_t, + z0 = svread_hor_za8_mf8_m (z1, p0, 0, w0), + z0 = svread_hor_za8_m (z1, p0, 0, w0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c index 93d5d60ea57..c4214d19e5d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c @@ -103,6 +103,16 @@ TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t, z0 = svread_ver_za128_u8_m (z0, p0, 0, w0), z0 = svread_ver_za128_m (z0, p0, 0, w0)) +/* +** read_za128_mf8_0_w0_tied: +** mov (w1[2-5]), w0 +** mova z0\.q, p0/m, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (read_za128_mf8_0_w0_tied, svmfloat8_t, + z0 = svread_ver_za128_mf8_m (z0, p0, 0, w0), + z0 = svread_ver_za128_m (z0, p0, 0, w0)) + /* ** read_za128_u8_0_w0_untied: ** ( @@ -124,6 +134,27 @@ TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t, z0 = svread_ver_za128_u8_m (z1, p0, 0, w0), z0 = svread_ver_za128_m (z1, p0, 0, w0)) +/* +** read_za128_mf8_0_w0_untied: +** ( +** mov (w1[2-5]), w0 +** mov z0\.d, z1\.d +** mova z0\.q, p0/m, za0v\.q\[\1, 0\] +** | +** mov z0\.d, z1\.d +** mov (w1[2-5]), w0 +** mova z0\.q, p0/m, za0v\.q\[\2, 0\] +** | +** mov (w1[2-5]), w0 +** mova z1\.q, p0/m, za0v\.q\[\3, 0\] +** mov z0\.d, z1\.d +** ) +** ret +*/ +TEST_READ_ZA (read_za128_mf8_0_w0_untied, svmfloat8_t, + z0 = svread_ver_za128_mf8_m (z1, p0, 0, w0), + z0 = svread_ver_za128_m (z1, p0, 0, w0)) + /* ** read_za128_s16_0_w0_tied: ** mov (w1[2-5]), w0 diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c index 87564d1fa68..3859b2351fb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c @@ -103,6 +103,16 @@ TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, z0 = svread_ver_za8_u8_m (z0, p0, 0, w0), z0 = svread_ver_za8_m (z0, p0, 0, w0)) +/* +** read_za8_mf8_0_w0_tied: +** mov (w1[2-5]), w0 +** mova z0\.b, p0/m, za0v\.b\[\1, 0\] +** ret +*/ +TEST_READ_ZA (read_za8_mf8_0_w0_tied, svmfloat8_t, + z0 = svread_ver_za8_mf8_m (z0, p0, 0, w0), + z0 = svread_ver_za8_m (z0, p0, 0, w0)) + /* ** read_za8_u8_0_w0_untied: ** ( @@ -123,3 +133,24 @@ TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t, z0 = svread_ver_za8_u8_m (z1, p0, 0, w0), z0 = svread_ver_za8_m (z1, p0, 0, w0)) + +/* +** read_za8_mf8_0_w0_untied: +** ( +** mov (w1[2-5]), w0 +** mov z0\.d, z1\.d +** mova z0\.b, p0/m, za0v\.b\[\1, 0\] +** | +** mov z0\.d, z1\.d +** mov (w1[2-5]), w0 +** mova z0\.b, p0/m, za0v\.b\[\2, 0\] +** | +** mov (w1[2-5]), w0 +** mova z1\.b, p0/m, za0v\.b\[\3, 0\] +** mov z0\.d, z1\.d +** ) +** ret +*/ +TEST_READ_ZA (read_za8_mf8_0_w0_untied, svmfloat8_t, + z0 = svread_ver_za8_mf8_m (z1, p0, 0, w0), + z0 = svread_ver_za8_m (z1, p0, 0, w0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_mf8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_mf8.c new file mode 100644 index 00000000000..611714b539b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_mf8.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_mf8_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_m_tied12, svmfloat8_t, + z0 = svrevd_mf8_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_mf8_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_m_tied1, svmfloat8_t, + z0 = svrevd_mf8_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_mf8_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_m_tied2, svmfloat8_t, + z0 = svrevd_mf8_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_mf8_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_m_untied, svmfloat8_t, + z0 = svrevd_mf8_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_mf8_z_tied1, svmfloat8_t, + z0 = svrevd_mf8_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_mf8_z_untied: +** movi? [vdz]0\.?(?:[0-9]*[bhsd])?, #?0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_z_untied, svmfloat8_t, + z0 = svrevd_mf8_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_mf8_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_x_tied1, svmfloat8_t, + z0 = svrevd_mf8_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_mf8_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_x_untied, svmfloat8_t, + z0 = svrevd_mf8_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c index 119a2535e99..09447b35619 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c @@ -92,6 +92,16 @@ TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t, svwrite_hor_za128_u8_m (0, w0, p0, z0), svwrite_hor_za128_m (0, w0, p0, z0)) +/* +** write_za128_mf8_0_w0_z0: +** mov (w1[2-5]), w0 +** mova za0h\.q\[\1, 0\], p0/m, z0\.q +** ret +*/ +TEST_WRITE_ZA (write_za128_mf8_0_w0_z0, svmfloat8_t, + svwrite_hor_za128_mf8_m (0, w0, p0, z0), + svwrite_hor_za128_m (0, w0, p0, z0)) + /* ** write_za128_s16_0_w0_z0: ** mov (w1[2-5]), w0 diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c index 683e1a64ab3..6529f9597fc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c @@ -91,3 +91,13 @@ TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t, TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t, svwrite_hor_za8_u8_m (0, w0, p0, z0), svwrite_hor_za8_m (0, w0, p0, z0)) + +/* +** write_za8_mf8_0_w0_z0: +** mov (w1[2-5]), w0 +** mova za0h\.b\[\1, 0\], p0/m, z0\.b +** ret +*/ +TEST_WRITE_ZA (write_za8_mf8_0_w0_z0, svmfloat8_t, + svwrite_hor_za8_mf8_m (0, w0, p0, z0), + svwrite_hor_za8_m (0, w0, p0, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c index 9622e99dde1..6c0d334c3dc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c @@ -92,6 +92,16 @@ TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t, svwrite_ver_za128_u8_m (0, w0, p0, z0), svwrite_ver_za128_m (0, w0, p0, z0)) +/* +** write_za128_mf8_0_w0_z0: +** mov (w1[2-5]), w0 +** mova za0v\.q\[\1, 0\], p0/m, z0\.q +** ret +*/ +TEST_WRITE_ZA (write_za128_mf8_0_w0_z0, svmfloat8_t, + svwrite_ver_za128_mf8_m (0, w0, p0, z0), + svwrite_ver_za128_m (0, w0, p0, z0)) + /* ** write_za128_s16_0_w0_z0: ** mov (w1[2-5]), w0 diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c index dd61828219c..0e7cda809f2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c @@ -91,3 +91,13 @@ TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t, TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t, svwrite_ver_za8_u8_m (0, w0, p0, z0), svwrite_ver_za8_m (0, w0, p0, z0)) + +/* +** write_za8_mf8_0_w0_z0: +** mov (w1[2-5]), w0 +** mova za0v\.b\[\1, 0\], p0/m, z0\.b +** ret +*/ +TEST_WRITE_ZA (write_za8_mf8_0_w0_z0, svmfloat8_t, + svwrite_ver_za8_mf8_m (0, w0, p0, z0), + svwrite_ver_za8_m (0, w0, p0, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x2.c new file mode 100644 index 00000000000..6891c5c009a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_mf8_base: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_mf8_index: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_1: +** incb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + svcntb ()), + z0 = svld1_x2 (pn8, x0 + svcntb ())) + +/* +** ld1_mf8_2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 2), + z0 = svld1_x2 (pn8, x0 + svcntb () * 2)) + +/* +** ld1_mf8_14: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_14, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 14), + z0 = svld1_x2 (pn8, x0 + svcntb () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_16: +** incb x0, all, mul #16 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_16, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 16), + z0 = svld1_x2 (pn8, x0 + svcntb () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_m1: +** decb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 - svcntb ()), + z0 = svld1_x2 (pn8, x0 - svcntb ())) + +/* +** ld1_mf8_m2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 2), + z0 = svld1_x2 (pn8, x0 - svcntb () * 2)) + +/* +** ld1_mf8_m16: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m16, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 16), + z0 = svld1_x2 (pn8, x0 - svcntb () * 16)) + +/* +** ld1_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m18, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 18), + z0 = svld1_x2 (pn8, x0 - svcntb () * 18)) + +/* +** ld1_mf8_z17: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x2_t, mfloat8_t, + z17 = svld1_mf8_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_mf8_z22: +** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x2_t, mfloat8_t, + z22 = svld1_mf8_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_mf8_z28: +** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x2_t, mfloat8_t, + z28 = svld1_mf8_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_mf8_pn15: +** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_mf8_0: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_1: +** incb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_mf8_2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_mf8_14: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_16: +** incb x0, all, mul #16 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_m1: +** decb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_mf8_m2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_mf8_m16: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x4.c new file mode 100644 index 00000000000..a95a33e6665 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_mf8_base: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_mf8_index: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_1: +** incb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb ()), + z0 = svld1_x4 (pn8, x0 + svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_2: +** incb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 2), + z0 = svld1_x4 (pn8, x0 + svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_3: +** incb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_3, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 3), + z0 = svld1_x4 (pn8, x0 + svcntb () * 3)) + +/* +** ld1_mf8_4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_4, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 4), + z0 = svld1_x4 (pn8, x0 + svcntb () * 4)) + +/* +** ld1_mf8_28: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_28, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 28), + z0 = svld1_x4 (pn8, x0 + svcntb () * 28)) + +/* +** ld1_mf8_32: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_32, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 32), + z0 = svld1_x4 (pn8, x0 + svcntb () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_m1: +** decb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb ()), + z0 = svld1_x4 (pn8, x0 - svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_m2: +** decb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 2), + z0 = svld1_x4 (pn8, x0 - svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_m3: +** decb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m3, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 3), + z0 = svld1_x4 (pn8, x0 - svcntb () * 3)) + +/* +** ld1_mf8_m4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_mf8_m4, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 4), + z0 = svld1_x4 (pn8, x0 - svcntb () * 4)) + +/* +** ld1_mf8_m32: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m32, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 32), + z0 = svld1_x4 (pn8, x0 - svcntb () * 32)) + +/* +** ld1_mf8_m36: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m36, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 36), + z0 = svld1_x4 (pn8, x0 - svcntb () * 36)) + +/* +** ld1_mf8_z17: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x4_t, mfloat8_t, + z17 = svld1_mf8_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_mf8_z22: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x4_t, mfloat8_t, + z22 = svld1_mf8_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_mf8_z28: +** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x4_t, mfloat8_t, + z28 = svld1_mf8_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_mf8_pn15: +** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_mf8_0: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_1: +** incb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_2: +** incb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_3: +** incb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_mf8_4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_mf8_28: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_mf8_32: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_m1: +** decb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_m2: +** decb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_m3: +** decb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_mf8_m4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_mf8_m32: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_mf8_m36: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x2.c new file mode 100644 index 00000000000..1855dd115c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_mf8_base: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_mf8_index: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_1: +** incb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb ()), + z0 = svldnt1_x2 (pn8, x0 + svcntb ())) + +/* +** ldnt1_mf8_2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2)) + +/* +** ldnt1_mf8_14: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_14, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_16: +** incb x0, all, mul #16 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_16, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_m1: +** decb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb ()), + z0 = svldnt1_x2 (pn8, x0 - svcntb ())) + +/* +** ldnt1_mf8_m2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2)) + +/* +** ldnt1_mf8_m16: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m16, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16)) + +/* +** ldnt1_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m18, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18)) + +/* +** ldnt1_mf8_z17: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x2_t, mfloat8_t, + z17 = svldnt1_mf8_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_mf8_z22: +** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x2_t, mfloat8_t, + z22 = svldnt1_mf8_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_mf8_z28: +** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x2_t, mfloat8_t, + z28 = svldnt1_mf8_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_mf8_pn15: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_mf8_0: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_1: +** incb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_mf8_2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_mf8_14: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_16: +** incb x0, all, mul #16 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_m1: +** decb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_mf8_m2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_mf8_m16: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x4.c new file mode 100644 index 00000000000..0fad26f4616 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_mf8_base: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_mf8_index: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_1: +** incb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb ()), + z0 = svldnt1_x4 (pn8, x0 + svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_2: +** incb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_3: +** incb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_3, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3)) + +/* +** ldnt1_mf8_4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_4, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4)) + +/* +** ldnt1_mf8_28: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_28, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28)) + +/* +** ldnt1_mf8_32: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_32, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_m1: +** decb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb ()), + z0 = svldnt1_x4 (pn8, x0 - svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_m2: +** decb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_m3: +** decb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m3, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3)) + +/* +** ldnt1_mf8_m4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_mf8_m4, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4)) + +/* +** ldnt1_mf8_m32: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m32, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32)) + +/* +** ldnt1_mf8_m36: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m36, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36)) + +/* +** ldnt1_mf8_z17: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x4_t, mfloat8_t, + z17 = svldnt1_mf8_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_mf8_z22: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x4_t, mfloat8_t, + z22 = svldnt1_mf8_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_mf8_z28: +** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x4_t, mfloat8_t, + z28 = svldnt1_mf8_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_mf8_pn15: +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_mf8_0: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_1: +** incb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_2: +** incb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_3: +** incb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_mf8_4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_mf8_28: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_mf8_32: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_m1: +** decb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_m2: +** decb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_m3: +** decb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_mf8_m4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_mf8_m32: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_mf8_m36: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c index ec31a68b46e..724ba852ef4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c @@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x2_t, z4 = svread_hor_za8_u8_vg2 (0, 1), z4 = svread_hor_za8_u8_vg2 (0, 1)) +/* +** read_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x2_t, + z4 = svread_hor_za8_mf8_vg2 (0, 1), + z4 = svread_hor_za8_mf8_vg2 (0, 1)) + /* ** read_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -50,6 +60,15 @@ TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x2_t, z18 = svread_hor_za8_u8_vg2 (0, w15), z18 = svread_hor_za8_u8_vg2 (0, w15)) +/* +** read_za8_mf8_z18_0_w15: +** mova {z18\.b - z19\.b}, za0h\.b\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x2_t, + z18 = svread_hor_za8_mf8_vg2 (0, w15), + z18 = svread_hor_za8_mf8_vg2 (0, w15)) + /* ** read_za8_s8_z23_0_w12p14: ** mova {[^\n]+}, za0h\.b\[w12, 14:15\] @@ -71,6 +90,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x2_t, z4 = svread_hor_za8_u8_vg2 (0, w12 + 1), z4 = svread_hor_za8_u8_vg2 (0, w12 + 1)) +/* +** read_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x2_t, + z4 = svread_hor_za8_mf8_vg2 (0, w12 + 1), + z4 = svread_hor_za8_mf8_vg2 (0, w12 + 1)) + /* ** read_za8_s8_z28_0_w12p2: ** mova {z28\.b - z29\.b}, za0h\.b\[w12, 2:3\] @@ -90,6 +119,16 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x2_t, z0 = svread_hor_za8_u8_vg2 (0, w15 + 3), z0 = svread_hor_za8_u8_vg2 (0, w15 + 3)) +/* +** read_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x2_t, + z0 = svread_hor_za8_mf8_vg2 (0, w15 + 3), + z0 = svread_hor_za8_mf8_vg2 (0, w15 + 3)) + /* ** read_za8_u8_z4_0_w15p12: ** mova {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\] @@ -99,6 +138,15 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x2_t, z4 = svread_hor_za8_u8_vg2 (0, w15 + 12), z4 = svread_hor_za8_u8_vg2 (0, w15 + 12)) +/* +** read_za8_mf8_z4_0_w15p12: +** mova {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x2_t, + z4 = svread_hor_za8_mf8_vg2 (0, w15 + 12), + z4 = svread_hor_za8_mf8_vg2 (0, w15 + 12)) + /* ** read_za8_u8_z28_0_w12p15: ** add (w[0-9]+), w12, #?15 @@ -109,6 +157,16 @@ TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p15, svuint8x2_t, z28 = svread_hor_za8_u8_vg2 (0, w12 + 15), z28 = svread_hor_za8_u8_vg2 (0, w12 + 15)) +/* +** read_za8_mf8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** mova {z28\.b - z29\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p15, svmfloat8x2_t, + z28 = svread_hor_za8_mf8_vg2 (0, w12 + 15), + z28 = svread_hor_za8_mf8_vg2 (0, w12 + 15)) + /* ** read_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -129,6 +187,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t, z4 = svread_hor_za8_u8_vg2 (0, w12 - 1), z4 = svread_hor_za8_u8_vg2 (0, w12 - 1)) +/* +** read_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x2_t, + z4 = svread_hor_za8_mf8_vg2 (0, w12 - 1), + z4 = svread_hor_za8_mf8_vg2 (0, w12 - 1)) + /* ** read_za8_u8_z18_0_w16: ** mov (w1[2-5]), w16 @@ -138,3 +206,13 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t, TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t, z18 = svread_hor_za8_u8_vg2 (0, w16), z18 = svread_hor_za8_u8_vg2 (0, w16)) + +/* +** read_za8_mf8_z18_0_w16: +** mov (w1[2-5]), w16 +** mova {z18\.b - z19\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z18_0_w16, svmfloat8x2_t, + z18 = svread_hor_za8_mf8_vg2 (0, w16), + z18 = svread_hor_za8_mf8_vg2 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c index 261cbead442..2c3132dc6a8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c @@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x4_t, z4 = svread_hor_za8_u8_vg4 (0, 1), z4 = svread_hor_za8_u8_vg4 (0, 1)) +/* +** read_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x4_t, + z4 = svread_hor_za8_mf8_vg4 (0, 1), + z4 = svread_hor_za8_mf8_vg4 (0, 1)) + /* ** read_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -54,6 +64,19 @@ TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x4_t, z18 = svread_hor_za8_u8_vg4 (0, w15), z18 = svread_hor_za8_u8_vg4 (0, w15)) +/* +** read_za8_mf8_z18_0_w15: +** mova {[^\n]+}, za0h\.b\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x4_t, + z18 = svread_hor_za8_mf8_vg4 (0, w15), + z18 = svread_hor_za8_mf8_vg4 (0, w15)) + /* ** read_za8_s8_z23_0_w12p12: ** mova {[^\n]+}, za0h\.b\[w12, 12:15\] @@ -77,6 +100,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x4_t, z4 = svread_hor_za8_u8_vg4 (0, w12 + 1), z4 = svread_hor_za8_u8_vg4 (0, w12 + 1)) +/* +** read_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x4_t, + z4 = svread_hor_za8_mf8_vg4 (0, w12 + 1), + z4 = svread_hor_za8_mf8_vg4 (0, w12 + 1)) + /* ** read_za8_s8_z28_0_w12p2: ** add (w[0-9]+), w12, #?2 @@ -97,6 +130,16 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x4_t, z0 = svread_hor_za8_u8_vg4 (0, w15 + 3), z0 = svread_hor_za8_u8_vg4 (0, w15 + 3)) +/* +** read_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x4_t, + z0 = svread_hor_za8_mf8_vg4 (0, w15 + 3), + z0 = svread_hor_za8_mf8_vg4 (0, w15 + 3)) + /* ** read_za8_u8_z0_0_w12p4: ** mova {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\] @@ -106,6 +149,15 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w12p4, svuint8x4_t, z0 = svread_hor_za8_u8_vg4 (0, w12 + 4), z0 = svread_hor_za8_u8_vg4 (0, w12 + 4)) +/* +** read_za8_mf8_z0_0_w12p4: +** mova {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z0_0_w12p4, svmfloat8x4_t, + z0 = svread_hor_za8_mf8_vg4 (0, w12 + 4), + z0 = svread_hor_za8_mf8_vg4 (0, w12 + 4)) + /* ** read_za8_u8_z4_0_w15p12: ** mova {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\] @@ -115,6 +167,15 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x4_t, z4 = svread_hor_za8_u8_vg4 (0, w15 + 12), z4 = svread_hor_za8_u8_vg4 (0, w15 + 12)) +/* +** read_za8_mf8_z4_0_w15p12: +** mova {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x4_t, + z4 = svread_hor_za8_mf8_vg4 (0, w15 + 12), + z4 = svread_hor_za8_mf8_vg4 (0, w15 + 12)) + /* ** read_za8_u8_z28_0_w12p14: ** add (w[0-9]+), w12, #?14 @@ -125,6 +186,16 @@ TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p14, svuint8x4_t, z28 = svread_hor_za8_u8_vg4 (0, w12 + 14), z28 = svread_hor_za8_u8_vg4 (0, w12 + 14)) +/* +** read_za8_mf8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p14, svmfloat8x4_t, + z28 = svread_hor_za8_mf8_vg4 (0, w12 + 14), + z28 = svread_hor_za8_mf8_vg4 (0, w12 + 14)) + /* ** read_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -145,6 +216,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t, z4 = svread_hor_za8_u8_vg4 (0, w12 - 1), z4 = svread_hor_za8_u8_vg4 (0, w12 - 1)) +/* +** read_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x4_t, + z4 = svread_hor_za8_mf8_vg4 (0, w12 - 1), + z4 = svread_hor_za8_mf8_vg4 (0, w12 - 1)) + /* ** read_za8_u8_z28_0_w16: ** mov (w1[2-5]), w16 @@ -154,3 +235,13 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t, TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t, z28 = svread_hor_za8_u8_vg4 (0, w16), z28 = svread_hor_za8_u8_vg4 (0, w16)) + +/* +** read_za8_u8_z28_0_w16: +** mov (w1[2-5]), w16 +** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z28_0_w16, svmfloat8x4_t, + z28 = svread_hor_za8_mf8_vg4 (0, w16), + z28 = svread_hor_za8_mf8_vg4 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c index 55970616ba8..5cd101a4988 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c @@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x2_t, z4 = svread_ver_za8_u8_vg2 (0, 1), z4 = svread_ver_za8_u8_vg2 (0, 1)) +/* +** read_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x2_t, + z4 = svread_ver_za8_mf8_vg2 (0, 1), + z4 = svread_ver_za8_mf8_vg2 (0, 1)) + /* ** read_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -50,6 +60,15 @@ TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x2_t, z18 = svread_ver_za8_u8_vg2 (0, w15), z18 = svread_ver_za8_u8_vg2 (0, w15)) +/* +** read_za8_mf8_z18_0_w15: +** mova {z18\.b - z19\.b}, za0v\.b\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x2_t, + z18 = svread_ver_za8_mf8_vg2 (0, w15), + z18 = svread_ver_za8_mf8_vg2 (0, w15)) + /* ** read_za8_s8_z23_0_w12p14: ** mova {[^\n]+}, za0v\.b\[w12, 14:15\] @@ -71,6 +90,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x2_t, z4 = svread_ver_za8_u8_vg2 (0, w12 + 1), z4 = svread_ver_za8_u8_vg2 (0, w12 + 1)) +/* +** read_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x2_t, + z4 = svread_ver_za8_mf8_vg2 (0, w12 + 1), + z4 = svread_ver_za8_mf8_vg2 (0, w12 + 1)) + /* ** read_za8_s8_z28_0_w12p2: ** mova {z28\.b - z29\.b}, za0v\.b\[w12, 2:3\] @@ -90,6 +119,16 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x2_t, z0 = svread_ver_za8_u8_vg2 (0, w15 + 3), z0 = svread_ver_za8_u8_vg2 (0, w15 + 3)) +/* +** read_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x2_t, + z0 = svread_ver_za8_mf8_vg2 (0, w15 + 3), + z0 = svread_ver_za8_mf8_vg2 (0, w15 + 3)) + /* ** read_za8_u8_z4_0_w15p12: ** mova {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\] @@ -99,6 +138,15 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x2_t, z4 = svread_ver_za8_u8_vg2 (0, w15 + 12), z4 = svread_ver_za8_u8_vg2 (0, w15 + 12)) +/* +** read_za8_mf8_z4_0_w15p12: +** mova {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x2_t, + z4 = svread_ver_za8_mf8_vg2 (0, w15 + 12), + z4 = svread_ver_za8_mf8_vg2 (0, w15 + 12)) + /* ** read_za8_u8_z28_0_w12p15: ** add (w[0-9]+), w12, #?15 @@ -109,6 +157,16 @@ TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p15, svuint8x2_t, z28 = svread_ver_za8_u8_vg2 (0, w12 + 15), z28 = svread_ver_za8_u8_vg2 (0, w12 + 15)) +/* +** read_za8_mf8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** mova {z28\.b - z29\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p15, svmfloat8x2_t, + z28 = svread_ver_za8_mf8_vg2 (0, w12 + 15), + z28 = svread_ver_za8_mf8_vg2 (0, w12 + 15)) + /* ** read_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -129,6 +187,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t, z4 = svread_ver_za8_u8_vg2 (0, w12 - 1), z4 = svread_ver_za8_u8_vg2 (0, w12 - 1)) +/* +** read_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x2_t, + z4 = svread_ver_za8_mf8_vg2 (0, w12 - 1), + z4 = svread_ver_za8_mf8_vg2 (0, w12 - 1)) + /* ** read_za8_u8_z18_0_w16: ** mov (w1[2-5]), w16 @@ -138,3 +206,13 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t, TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t, z18 = svread_ver_za8_u8_vg2 (0, w16), z18 = svread_ver_za8_u8_vg2 (0, w16)) + +/* +** read_za8_mf8_z18_0_w16: +** mov (w1[2-5]), w16 +** mova {z18\.b - z19\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z18_0_w16, svmfloat8x2_t, + z18 = svread_ver_za8_mf8_vg2 (0, w16), + z18 = svread_ver_za8_mf8_vg2 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c index 6fd8a976d4f..daae8bc7285 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c @@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x4_t, z4 = svread_ver_za8_u8_vg4 (0, 1), z4 = svread_ver_za8_u8_vg4 (0, 1)) +/* +** read_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x4_t, + z4 = svread_ver_za8_mf8_vg4 (0, 1), + z4 = svread_ver_za8_mf8_vg4 (0, 1)) + /* ** read_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -54,6 +64,19 @@ TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x4_t, z18 = svread_ver_za8_u8_vg4 (0, w15), z18 = svread_ver_za8_u8_vg4 (0, w15)) +/* +** read_za8_mf8_z18_0_w15: +** mova {[^\n]+}, za0v\.b\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x4_t, + z18 = svread_ver_za8_mf8_vg4 (0, w15), + z18 = svread_ver_za8_mf8_vg4 (0, w15)) + /* ** read_za8_s8_z23_0_w12p12: ** mova {[^\n]+}, za0v\.b\[w12, 12:15\] @@ -77,6 +100,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x4_t, z4 = svread_ver_za8_u8_vg4 (0, w12 + 1), z4 = svread_ver_za8_u8_vg4 (0, w12 + 1)) +/* +** read_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x4_t, + z4 = svread_ver_za8_mf8_vg4 (0, w12 + 1), + z4 = svread_ver_za8_mf8_vg4 (0, w12 + 1)) + /* ** read_za8_s8_z28_0_w12p2: ** add (w[0-9]+), w12, #?2 @@ -97,6 +130,16 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x4_t, z0 = svread_ver_za8_u8_vg4 (0, w15 + 3), z0 = svread_ver_za8_u8_vg4 (0, w15 + 3)) +/* +** read_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x4_t, + z0 = svread_ver_za8_mf8_vg4 (0, w15 + 3), + z0 = svread_ver_za8_mf8_vg4 (0, w15 + 3)) + /* ** read_za8_u8_z0_0_w12p4: ** mova {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\] @@ -106,6 +149,15 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w12p4, svuint8x4_t, z0 = svread_ver_za8_u8_vg4 (0, w12 + 4), z0 = svread_ver_za8_u8_vg4 (0, w12 + 4)) +/* +** read_za8_mf8_z0_0_w12p4: +** mova {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z0_0_w12p4, svmfloat8x4_t, + z0 = svread_ver_za8_mf8_vg4 (0, w12 + 4), + z0 = svread_ver_za8_mf8_vg4 (0, w12 + 4)) + /* ** read_za8_u8_z4_0_w15p12: ** mova {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\] @@ -115,6 +167,15 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x4_t, z4 = svread_ver_za8_u8_vg4 (0, w15 + 12), z4 = svread_ver_za8_u8_vg4 (0, w15 + 12)) +/* +** read_za8_mf8_z4_0_w15p12: +** mova {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x4_t, + z4 = svread_ver_za8_mf8_vg4 (0, w15 + 12), + z4 = svread_ver_za8_mf8_vg4 (0, w15 + 12)) + /* ** read_za8_u8_z28_0_w12p14: ** add (w[0-9]+), w12, #?14 @@ -125,6 +186,16 @@ TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p14, svuint8x4_t, z28 = svread_ver_za8_u8_vg4 (0, w12 + 14), z28 = svread_ver_za8_u8_vg4 (0, w12 + 14)) +/* +** read_za8_mf8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p14, svmfloat8x4_t, + z28 = svread_ver_za8_mf8_vg4 (0, w12 + 14), + z28 = svread_ver_za8_mf8_vg4 (0, w12 + 14)) + /* ** read_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -145,6 +216,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t, z4 = svread_ver_za8_u8_vg4 (0, w12 - 1), z4 = svread_ver_za8_u8_vg4 (0, w12 - 1)) +/* +** read_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x4_t, + z4 = svread_ver_za8_mf8_vg4 (0, w12 - 1), + z4 = svread_ver_za8_mf8_vg4 (0, w12 - 1)) + /* ** read_za8_u8_z28_0_w16: ** mov (w1[2-5]), w16 @@ -154,3 +235,13 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t, TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t, z28 = svread_ver_za8_u8_vg4 (0, w16), z28 = svread_ver_za8_u8_vg4 (0, w16)) + +/* +** read_za8_mf8_z28_0_w16: +** mov (w1[2-5]), w16 +** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_mf8_z28_0_w16, svmfloat8x4_t, + z28 = svread_ver_za8_mf8_vg4 (0, w16), + z28 = svread_ver_za8_mf8_vg4 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c index 9b151abf4fa..819bf786a4f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c @@ -32,6 +32,16 @@ TEST_READ_ZA_XN (read_w7_z0, svuint8x2_t, z0 = svread_za8_u8_vg1x2 (w7), z0 = svread_za8_u8_vg1x2 (w7)) +/* +** read_mf8_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w7_z0, svmfloat8x2_t, + z0 = svread_za8_mf8_vg1x2 (w7), + z0 = svread_za8_mf8_vg1x2 (w7)) + /* ** read_w8_z0: ** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\] @@ -61,6 +71,16 @@ TEST_READ_ZA_XN (read_w12_z0, svuint8x2_t, z0 = svread_za8_u8_vg1x2 (w12), z0 = svread_za8_u8_vg1x2 (w12)) +/* +** read_mf8_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w12_z0, svmfloat8x2_t, + z0 = svread_za8_mf8_vg1x2 (w12), + z0 = svread_za8_mf8_vg1x2 (w12)) + /* ** read_w8p7_z0: ** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\] @@ -90,6 +110,16 @@ TEST_READ_ZA_XN (read_w8m1_z0, svuint8x2_t, z0 = svread_za8_u8_vg1x2 (w8 - 1), z0 = svread_za8_u8_vg1x2 (w8 - 1)) +/* +** read_mf8_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w8m1_z0, svmfloat8x2_t, + z0 = svread_za8_mf8_vg1x2 (w8 - 1), + z0 = svread_za8_mf8_vg1x2 (w8 - 1)) + /* ** read_w8_z18: ** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\] @@ -99,6 +129,15 @@ TEST_READ_ZA_XN (read_w8_z18, svuint8x2_t, z18 = svread_za8_u8_vg1x2 (w8), z18 = svread_za8_u8_vg1x2 (w8)) +/* +** read_mf8_w8_z18: +** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w8_z18, svmfloat8x2_t, + z18 = svread_za8_mf8_vg1x2 (w8), + z18 = svread_za8_mf8_vg1x2 (w8)) + /* Leave the assembler to check for correctness for misaligned registers. */ /* @@ -120,3 +159,12 @@ TEST_READ_ZA_XN (read_w8_z23, svint8x2_t, TEST_READ_ZA_XN (read_w8_z28, svuint8x2_t, z28 = svread_za8_u8_vg1x2 (w8), z28 = svread_za8_u8_vg1x2 (w8)) + +/* +** read_mf8_w8_z28: +** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w8_z28, svmfloat8x2_t, + z28 = svread_za8_mf8_vg1x2 (w8), + z28 = svread_za8_mf8_vg1x2 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c index 80c81dde097..f8c6d2a3d43 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c @@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_w0_z0, svuint8x4_t, z0 = svread_za8_u8_vg1x4 (w0), z0 = svread_za8_u8_vg1x4 (w0)) +/* +** read_mf8_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w0_z0, svmfloat8x4_t, + z0 = svread_za8_mf8_vg1x4 (w0), + z0 = svread_za8_mf8_vg1x4 (w0)) + /* ** read_w7_z0: ** mov (w8|w9|w10|w11), w7 @@ -50,6 +60,14 @@ TEST_READ_ZA_XN (read_w11_z0, svuint8x4_t, z0 = svread_za8_u8_vg1x4 (w11), z0 = svread_za8_u8_vg1x4 (w11)) +/* +** read_mf8_w11_z0: +** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w11_z0, svmfloat8x4_t, + z0 = svread_za8_mf8_vg1x4 (w11), + z0 = svread_za8_mf8_vg1x4 (w11)) /* ** read_w12_z0: @@ -80,6 +98,16 @@ TEST_READ_ZA_XN (read_w8p8_z0, svuint8x4_t, z0 = svread_za8_u8_vg1x4 (w8 + 8), z0 = svread_za8_u8_vg1x4 (w8 + 8)) +/* +** read_mf8_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w8p8_z0, svmfloat8x4_t, + z0 = svread_za8_mf8_vg1x4 (w8 + 8), + z0 = svread_za8_mf8_vg1x4 (w8 + 8)) + /* ** read_w8m1_z0: ** sub (w8|w9|w10|w11), w8, #?1 @@ -114,6 +142,19 @@ TEST_READ_ZA_XN (read_w8_z18, svuint8x4_t, z18 = svread_za8_u8_vg1x4 (w8), z18 = svread_za8_u8_vg1x4 (w8)) +/* +** read_mf8_w8_z18: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w8_z18, svmfloat8x4_t, + z18 = svread_za8_mf8_vg1x4 (w8), + z18 = svread_za8_mf8_vg1x4 (w8)) + /* ** read_w8_z23: ** mova [^\n]+, za\.d\[w8, 0, vgx4\] @@ -127,6 +168,19 @@ TEST_READ_ZA_XN (read_w8_z23, svuint8x4_t, z23 = svread_za8_u8_vg1x4 (w8), z23 = svread_za8_u8_vg1x4 (w8)) +/* +** read_mf8_w8_z23: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_mf8_w8_z23, svmfloat8x4_t, + z23 = svread_za8_mf8_vg1x4 (w8), + z23 = svread_za8_mf8_vg1x4 (w8)) + /* ** read_w8_z28: ** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\] diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c index 8b6644f1d6e..aa29879331e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c @@ -86,6 +86,16 @@ TEST_READ_ZA (readz_za128_u8_0_w0, svuint8_t, z0 = svreadz_hor_za128_u8 (0, w0), z0 = svreadz_hor_za128_u8 (0, w0)) +/* +** readz_za128_mf8_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0h\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_mf8_0_w0, svmfloat8_t, + z0 = svreadz_hor_za128_mf8 (0, w0), + z0 = svreadz_hor_za128_mf8 (0, w0)) + /* ** readz_za128_s16_0_w0: ** mov (w1[2-5]), w0 diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c index 6fea16459e2..f6f595f5697 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c @@ -85,3 +85,13 @@ TEST_READ_ZA (readz_za8_s8_0_w0m1, svint8_t, TEST_READ_ZA (readz_za8_u8_0_w0, svuint8_t, z0 = svreadz_hor_za8_u8 (0, w0), z0 = svreadz_hor_za8_u8 (0, w0)) + +/* +** readz_za8_mf8_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.b, za0h\.b\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za8_mf8_0_w0, svmfloat8_t, + z0 = svreadz_hor_za8_mf8 (0, w0), + z0 = svreadz_hor_za8_mf8 (0, w0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c index a1a63104ad4..d09687e3674 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c @@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_1, svuint8x2_t, z4 = svreadz_hor_za8_u8_vg2 (0, 1), z4 = svreadz_hor_za8_u8_vg2 (0, 1)) +/* +** readz_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** movaz {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x2_t, + z4 = svreadz_hor_za8_mf8_vg2 (0, 1), + z4 = svreadz_hor_za8_mf8_vg2 (0, 1)) + /* ** readz_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -54,6 +64,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z18_0_w15, svuint8x2_t, z18 = svreadz_hor_za8_u8_vg2 (0, w15), z18 = svreadz_hor_za8_u8_vg2 (0, w15)) +/* +** readz_za8_mf8_z18_0_w15: +** movaz {z18\.b - z19\.b}, za0h\.b\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x2_t, + z18 = svreadz_hor_za8_mf8_vg2 (0, w15), + z18 = svreadz_hor_za8_mf8_vg2 (0, w15)) + /* ** readz_za8_s8_z23_0_w12p14: ** movaz {[^\n]+}, za0h\.b\[w12, 14:15\] @@ -75,6 +94,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12p1, svuint8x2_t, z4 = svreadz_hor_za8_u8_vg2 (0, w12 + 1), z4 = svreadz_hor_za8_u8_vg2 (0, w12 + 1)) +/* +** readz_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** movaz {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x2_t, + z4 = svreadz_hor_za8_mf8_vg2 (0, w12 + 1), + z4 = svreadz_hor_za8_mf8_vg2 (0, w12 + 1)) + /* ** readz_za8_s8_z28_0_w12p2: ** movaz {z28\.b - z29\.b}, za0h\.b\[w12, 2:3\] @@ -94,6 +123,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w15p3, svuint8x2_t, z0 = svreadz_hor_za8_u8_vg2 (0, w15 + 3), z0 = svreadz_hor_za8_u8_vg2 (0, w15 + 3)) +/* +** readz_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** movaz {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x2_t, + z0 = svreadz_hor_za8_mf8_vg2 (0, w15 + 3), + z0 = svreadz_hor_za8_mf8_vg2 (0, w15 + 3)) + /* ** readz_za8_u8_z4_0_w15p12: ** movaz {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\] @@ -103,6 +142,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w15p12, svuint8x2_t, z4 = svreadz_hor_za8_u8_vg2 (0, w15 + 12), z4 = svreadz_hor_za8_u8_vg2 (0, w15 + 12)) +/* +** readz_za8_mf8_z4_0_w15p12: +** movaz {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x2_t, + z4 = svreadz_hor_za8_mf8_vg2 (0, w15 + 12), + z4 = svreadz_hor_za8_mf8_vg2 (0, w15 + 12)) + /* ** readz_za8_u8_z28_0_w12p15: ** add (w[0-9]+), w12, #?15 @@ -113,6 +161,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z28_0_w12p15, svuint8x2_t, z28 = svreadz_hor_za8_u8_vg2 (0, w12 + 15), z28 = svreadz_hor_za8_u8_vg2 (0, w12 + 15)) +/* +** readz_za8_mf8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** movaz {z28\.b - z29\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p15, svmfloat8x2_t, + z28 = svreadz_hor_za8_mf8_vg2 (0, w12 + 15), + z28 = svreadz_hor_za8_mf8_vg2 (0, w12 + 15)) + /* ** readz_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -133,6 +191,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x2_t, z4 = svreadz_hor_za8_u8_vg2 (0, w12 - 1), z4 = svreadz_hor_za8_u8_vg2 (0, w12 - 1)) +/* +** readz_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** movaz {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x2_t, + z4 = svreadz_hor_za8_mf8_vg2 (0, w12 - 1), + z4 = svreadz_hor_za8_mf8_vg2 (0, w12 - 1)) + /* ** readz_za8_u8_z18_0_w16: ** mov (w1[2-5]), w16 @@ -142,3 +210,13 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x2_t, TEST_READ_ZA_XN (readz_za8_u8_z18_0_w16, svuint8x2_t, z18 = svreadz_hor_za8_u8_vg2 (0, w16), z18 = svreadz_hor_za8_u8_vg2 (0, w16)) + +/* +** readz_za8_mf8_z18_0_w16: +** mov (w1[2-5]), w16 +** movaz {z18\.b - z19\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w16, svmfloat8x2_t, + z18 = svreadz_hor_za8_mf8_vg2 (0, w16), + z18 = svreadz_hor_za8_mf8_vg2 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c index ca71bc513e3..eec47bf3152 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c @@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_1, svuint8x4_t, z4 = svreadz_hor_za8_u8_vg4 (0, 1), z4 = svreadz_hor_za8_u8_vg4 (0, 1)) +/* +** readz_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** movaz {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x4_t, + z4 = svreadz_hor_za8_mf8_vg4 (0, 1), + z4 = svreadz_hor_za8_mf8_vg4 (0, 1)) + /* ** readz_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -58,6 +68,19 @@ TEST_READ_ZA_XN (readz_za8_u8_z18_0_w15, svuint8x4_t, z18 = svreadz_hor_za8_u8_vg4 (0, w15), z18 = svreadz_hor_za8_u8_vg4 (0, w15)) +/* +** readz_za8_mf8_z18_0_w15: +** movaz {[^\n]+}, za0h\.b\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x4_t, + z18 = svreadz_hor_za8_mf8_vg4 (0, w15), + z18 = svreadz_hor_za8_mf8_vg4 (0, w15)) + /* ** readz_za8_s8_z23_0_w12p12: ** movaz {[^\n]+}, za0h\.b\[w12, 12:15\] @@ -81,6 +104,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12p1, svuint8x4_t, z4 = svreadz_hor_za8_u8_vg4 (0, w12 + 1), z4 = svreadz_hor_za8_u8_vg4 (0, w12 + 1)) +/* +** readz_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** movaz {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x4_t, + z4 = svreadz_hor_za8_mf8_vg4 (0, w12 + 1), + z4 = svreadz_hor_za8_mf8_vg4 (0, w12 + 1)) + /* ** readz_za8_s8_z28_0_w12p2: ** add (w[0-9]+), w12, #?2 @@ -101,6 +134,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w15p3, svuint8x4_t, z0 = svreadz_hor_za8_u8_vg4 (0, w15 + 3), z0 = svreadz_hor_za8_u8_vg4 (0, w15 + 3)) +/* +** readz_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** movaz {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x4_t, + z0 = svreadz_hor_za8_mf8_vg4 (0, w15 + 3), + z0 = svreadz_hor_za8_mf8_vg4 (0, w15 + 3)) + /* ** readz_za8_u8_z0_0_w12p4: ** movaz {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\] @@ -110,6 +153,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w12p4, svuint8x4_t, z0 = svreadz_hor_za8_u8_vg4 (0, w12 + 4), z0 = svreadz_hor_za8_u8_vg4 (0, w12 + 4)) +/* +** readz_za8_mf8_z0_0_w12p4: +** movaz {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w12p4, svmfloat8x4_t, + z0 = svreadz_hor_za8_mf8_vg4 (0, w12 + 4), + z0 = svreadz_hor_za8_mf8_vg4 (0, w12 + 4)) + /* ** readz_za8_u8_z4_0_w15p12: ** movaz {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\] @@ -119,6 +171,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w15p12, svuint8x4_t, z4 = svreadz_hor_za8_u8_vg4 (0, w15 + 12), z4 = svreadz_hor_za8_u8_vg4 (0, w15 + 12)) +/* +** readz_za8_mf8_z4_0_w15p12: +** movaz {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x4_t, + z4 = svreadz_hor_za8_mf8_vg4 (0, w15 + 12), + z4 = svreadz_hor_za8_mf8_vg4 (0, w15 + 12)) + /* ** readz_za8_u8_z28_0_w12p14: ** add (w[0-9]+), w12, #?14 @@ -129,6 +190,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z28_0_w12p14, svuint8x4_t, z28 = svreadz_hor_za8_u8_vg4 (0, w12 + 14), z28 = svreadz_hor_za8_u8_vg4 (0, w12 + 14)) +/* +** readz_za8_mf8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** movaz {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p14, svmfloat8x4_t, + z28 = svreadz_hor_za8_mf8_vg4 (0, w12 + 14), + z28 = svreadz_hor_za8_mf8_vg4 (0, w12 + 14)) + /* ** readz_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -149,6 +220,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x4_t, z4 = svreadz_hor_za8_u8_vg4 (0, w12 - 1), z4 = svreadz_hor_za8_u8_vg4 (0, w12 - 1)) +/* +** readz_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** movaz {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x4_t, + z4 = svreadz_hor_za8_mf8_vg4 (0, w12 - 1), + z4 = svreadz_hor_za8_mf8_vg4 (0, w12 - 1)) + /* ** readz_za8_u8_z28_0_w16: ** mov (w1[2-5]), w16 @@ -158,3 +239,13 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x4_t, TEST_READ_ZA_XN (readz_za8_u8_z28_0_w16, svuint8x4_t, z28 = svreadz_hor_za8_u8_vg4 (0, w16), z28 = svreadz_hor_za8_u8_vg4 (0, w16)) + +/* +** readz_za8_mf8_z28_0_w16: +** mov (w1[2-5]), w16 +** movaz {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w16, svmfloat8x4_t, + z28 = svreadz_hor_za8_mf8_vg4 (0, w16), + z28 = svreadz_hor_za8_mf8_vg4 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za128.c new file mode 100644 index 00000000000..401543cbbcd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za128.c @@ -0,0 +1,197 @@ +/* { dg-do assemble { target aarch64_asm_sme2p1_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme2p1_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme2p1" + +/* +** readz_za128_s8_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s8_0_0, svint8_t, + z0 = svreadz_ver_za128_s8 (0, 0), + z0 = svreadz_ver_za128_s8 (0, 0)) + +/* +** readz_za128_s8_0_1: +** mov (w1[2-5]), #?1 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s8_0_1, svint8_t, + z0 = svreadz_ver_za128_s8 (0, 1), + z0 = svreadz_ver_za128_s8 (0, 1)) + +/* +** readz_za128_s8_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s8_0_w0, svint8_t, + z0 = svreadz_ver_za128_s8 (0, w0), + z0 = svreadz_ver_za128_s8 (0, w0)) + +/* +** readz_za128_s8_0_w0p1: +** add (w1[2-5]), w0, #?1 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s8_0_w0p1, svint8_t, + z0 = svreadz_ver_za128_s8 (0, w0 + 1), + z0 = svreadz_ver_za128_s8 (0, w0 + 1)) + +/* +** readz_za128_s8_0_w0m1: +** sub (w1[2-5]), w0, #?1 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s8_0_w0m1, svint8_t, + z0 = svreadz_ver_za128_s8 (0, w0 - 1), + z0 = svreadz_ver_za128_s8 (0, w0 - 1)) + +/* +** readz_za128_s8_1_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za1v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s8_1_w0, svint8_t, + z0 = svreadz_ver_za128_s8 (1, w0), + z0 = svreadz_ver_za128_s8 (1, w0)) + +/* +** readz_za128_s8_15_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za15v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s8_15_w0, svint8_t, + z0 = svreadz_ver_za128_s8 (15, w0), + z0 = svreadz_ver_za128_s8 (15, w0)) + +/* +** readz_za128_u8_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_u8_0_w0, svuint8_t, + z0 = svreadz_ver_za128_u8 (0, w0), + z0 = svreadz_ver_za128_u8 (0, w0)) + +/* +** readz_za128_mf8_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_mf8_0_w0, svmfloat8_t, + z0 = svreadz_ver_za128_mf8 (0, w0), + z0 = svreadz_ver_za128_mf8 (0, w0)) + +/* +** readz_za128_s16_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s16_0_w0, svint16_t, + z0 = svreadz_ver_za128_s16 (0, w0), + z0 = svreadz_ver_za128_s16 (0, w0)) + +/* +** readz_za128_u16_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_u16_0_w0, svuint16_t, + z0 = svreadz_ver_za128_u16 (0, w0), + z0 = svreadz_ver_za128_u16 (0, w0)) + +/* +** readz_za128_f16_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_f16_0_w0, svfloat16_t, + z0 = svreadz_ver_za128_f16 (0, w0), + z0 = svreadz_ver_za128_f16 (0, w0)) + +/* +** readz_za128_bf16_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_bf16_0_w0, svbfloat16_t, + z0 = svreadz_ver_za128_bf16 (0, w0), + z0 = svreadz_ver_za128_bf16 (0, w0)) + +/* +** readz_za128_s32_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s32_0_w0, svint32_t, + z0 = svreadz_ver_za128_s32 (0, w0), + z0 = svreadz_ver_za128_s32 (0, w0)) + +/* +** readz_za128_u32_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_u32_0_w0, svuint32_t, + z0 = svreadz_ver_za128_u32 (0, w0), + z0 = svreadz_ver_za128_u32 (0, w0)) + +/* +** readz_za128_f32_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_f32_0_w0, svfloat32_t, + z0 = svreadz_ver_za128_f32 (0, w0), + z0 = svreadz_ver_za128_f32 (0, w0)) + +/* +** readz_za128_s64_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_s64_0_w0, svint64_t, + z0 = svreadz_ver_za128_s64 (0, w0), + z0 = svreadz_ver_za128_s64 (0, w0)) + +/* +** readz_za128_u64_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_u64_0_w0, svuint64_t, + z0 = svreadz_ver_za128_u64 (0, w0), + z0 = svreadz_ver_za128_u64 (0, w0)) + +/* +** readz_za128_f64_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.q, za0v\.q\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za128_f64_0_w0, svfloat64_t, + z0 = svreadz_ver_za128_f64 (0, w0), + z0 = svreadz_ver_za128_f64 (0, w0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c index 4bd5ae783ef..66c42cecd31 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c @@ -85,3 +85,13 @@ TEST_READ_ZA (readz_za8_s8_0_w0m1, svint8_t, TEST_READ_ZA (readz_za8_u8_0_w0, svuint8_t, z0 = svreadz_ver_za8_u8 (0, w0), z0 = svreadz_ver_za8_u8 (0, w0)) + +/* +** readz_za8_mf8_0_w0: +** mov (w1[2-5]), w0 +** movaz z0\.b, za0v\.b\[\1, 0\] +** ret +*/ +TEST_READ_ZA (readz_za8_mf8_0_w0, svmfloat8_t, + z0 = svreadz_ver_za8_mf8 (0, w0), + z0 = svreadz_ver_za8_mf8 (0, w0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c index 940a5619a13..daa6b131587 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c @@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_1, svuint8x2_t, z4 = svreadz_ver_za8_u8_vg2 (0, 1), z4 = svreadz_ver_za8_u8_vg2 (0, 1)) +/* +** readz_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** movaz {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x2_t, + z4 = svreadz_ver_za8_mf8_vg2 (0, 1), + z4 = svreadz_ver_za8_mf8_vg2 (0, 1)) + /* ** readz_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -54,6 +64,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z18_0_w15, svuint8x2_t, z18 = svreadz_ver_za8_u8_vg2 (0, w15), z18 = svreadz_ver_za8_u8_vg2 (0, w15)) +/* +** readz_za8_mf8_z18_0_w15: +** movaz {z18\.b - z19\.b}, za0v\.b\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x2_t, + z18 = svreadz_ver_za8_mf8_vg2 (0, w15), + z18 = svreadz_ver_za8_mf8_vg2 (0, w15)) + /* ** readz_za8_s8_z23_0_w12p14: ** movaz {[^\n]+}, za0v\.b\[w12, 14:15\] @@ -75,6 +94,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12p1, svuint8x2_t, z4 = svreadz_ver_za8_u8_vg2 (0, w12 + 1), z4 = svreadz_ver_za8_u8_vg2 (0, w12 + 1)) +/* +** readz_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** movaz {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x2_t, + z4 = svreadz_ver_za8_mf8_vg2 (0, w12 + 1), + z4 = svreadz_ver_za8_mf8_vg2 (0, w12 + 1)) + /* ** readz_za8_s8_z28_0_w12p2: ** movaz {z28\.b - z29\.b}, za0v\.b\[w12, 2:3\] @@ -94,6 +123,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w15p3, svuint8x2_t, z0 = svreadz_ver_za8_u8_vg2 (0, w15 + 3), z0 = svreadz_ver_za8_u8_vg2 (0, w15 + 3)) +/* +** readz_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** movaz {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x2_t, + z0 = svreadz_ver_za8_mf8_vg2 (0, w15 + 3), + z0 = svreadz_ver_za8_mf8_vg2 (0, w15 + 3)) + /* ** readz_za8_u8_z4_0_w15p12: ** movaz {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\] @@ -103,6 +142,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w15p12, svuint8x2_t, z4 = svreadz_ver_za8_u8_vg2 (0, w15 + 12), z4 = svreadz_ver_za8_u8_vg2 (0, w15 + 12)) +/* +** readz_za8_mf8_z4_0_w15p12: +** movaz {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x2_t, + z4 = svreadz_ver_za8_mf8_vg2 (0, w15 + 12), + z4 = svreadz_ver_za8_mf8_vg2 (0, w15 + 12)) + /* ** readz_za8_u8_z28_0_w12p15: ** add (w[0-9]+), w12, #?15 @@ -113,6 +161,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z28_0_w12p15, svuint8x2_t, z28 = svreadz_ver_za8_u8_vg2 (0, w12 + 15), z28 = svreadz_ver_za8_u8_vg2 (0, w12 + 15)) +/* +** readz_za8_mf8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** movaz {z28\.b - z29\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p15, svmfloat8x2_t, + z28 = svreadz_ver_za8_mf8_vg2 (0, w12 + 15), + z28 = svreadz_ver_za8_mf8_vg2 (0, w12 + 15)) + /* ** readz_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -133,6 +191,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x2_t, z4 = svreadz_ver_za8_u8_vg2 (0, w12 - 1), z4 = svreadz_ver_za8_u8_vg2 (0, w12 - 1)) +/* +** readz_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** movaz {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x2_t, + z4 = svreadz_ver_za8_mf8_vg2 (0, w12 - 1), + z4 = svreadz_ver_za8_mf8_vg2 (0, w12 - 1)) + /* ** readz_za8_u8_z18_0_w16: ** mov (w1[2-5]), w16 @@ -142,3 +210,12 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x2_t, TEST_READ_ZA_XN (readz_za8_u8_z18_0_w16, svuint8x2_t, z18 = svreadz_ver_za8_u8_vg2 (0, w16), z18 = svreadz_ver_za8_u8_vg2 (0, w16)) +/* +** readz_za8_mf8_z18_0_w16: +** mov (w1[2-5]), w16 +** movaz {z18\.b - z19\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w16, svmfloat8x2_t, + z18 = svreadz_ver_za8_mf8_vg2 (0, w16), + z18 = svreadz_ver_za8_mf8_vg2 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c index 9f776ded80f..f3c06d8f029 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c @@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_1, svuint8x4_t, z4 = svreadz_ver_za8_u8_vg4 (0, 1), z4 = svreadz_ver_za8_u8_vg4 (0, 1)) +/* +** readz_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** movaz {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x4_t, + z4 = svreadz_ver_za8_mf8_vg4 (0, 1), + z4 = svreadz_ver_za8_mf8_vg4 (0, 1)) + /* ** readz_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -58,6 +68,19 @@ TEST_READ_ZA_XN (readz_za8_u8_z18_0_w15, svuint8x4_t, z18 = svreadz_ver_za8_u8_vg4 (0, w15), z18 = svreadz_ver_za8_u8_vg4 (0, w15)) +/* +** readz_za8_mf8_z18_0_w15: +** movaz {[^\n]+}, za0v\.b\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x4_t, + z18 = svreadz_ver_za8_mf8_vg4 (0, w15), + z18 = svreadz_ver_za8_mf8_vg4 (0, w15)) + /* ** readz_za8_s8_z23_0_w12p12: ** movaz {[^\n]+}, za0v\.b\[w12, 12:15\] @@ -81,6 +104,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12p1, svuint8x4_t, z4 = svreadz_ver_za8_u8_vg4 (0, w12 + 1), z4 = svreadz_ver_za8_u8_vg4 (0, w12 + 1)) +/* +** readz_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** movaz {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x4_t, + z4 = svreadz_ver_za8_mf8_vg4 (0, w12 + 1), + z4 = svreadz_ver_za8_mf8_vg4 (0, w12 + 1)) + /* ** readz_za8_s8_z28_0_w12p2: ** add (w[0-9]+), w12, #?2 @@ -101,6 +134,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w15p3, svuint8x4_t, z0 = svreadz_ver_za8_u8_vg4 (0, w15 + 3), z0 = svreadz_ver_za8_u8_vg4 (0, w15 + 3)) +/* +** readz_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** movaz {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x4_t, + z0 = svreadz_ver_za8_mf8_vg4 (0, w15 + 3), + z0 = svreadz_ver_za8_mf8_vg4 (0, w15 + 3)) + /* ** readz_za8_u8_z0_0_w12p4: ** movaz {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\] @@ -110,6 +153,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w12p4, svuint8x4_t, z0 = svreadz_ver_za8_u8_vg4 (0, w12 + 4), z0 = svreadz_ver_za8_u8_vg4 (0, w12 + 4)) +/* +** readz_za8_mf8_z0_0_w12p4: +** movaz {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w12p4, svmfloat8x4_t, + z0 = svreadz_ver_za8_mf8_vg4 (0, w12 + 4), + z0 = svreadz_ver_za8_mf8_vg4 (0, w12 + 4)) + /* ** readz_za8_u8_z4_0_w15p12: ** movaz {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\] @@ -119,6 +171,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w15p12, svuint8x4_t, z4 = svreadz_ver_za8_u8_vg4 (0, w15 + 12), z4 = svreadz_ver_za8_u8_vg4 (0, w15 + 12)) +/* +** readz_za8_mf8_z4_0_w15p12: +** movaz {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x4_t, + z4 = svreadz_ver_za8_mf8_vg4 (0, w15 + 12), + z4 = svreadz_ver_za8_mf8_vg4 (0, w15 + 12)) + /* ** readz_za8_u8_z28_0_w12p14: ** add (w[0-9]+), w12, #?14 @@ -129,6 +190,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z28_0_w12p14, svuint8x4_t, z28 = svreadz_ver_za8_u8_vg4 (0, w12 + 14), z28 = svreadz_ver_za8_u8_vg4 (0, w12 + 14)) +/* +** readz_za8_mf8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** movaz {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p14, svmfloat8x4_t, + z28 = svreadz_ver_za8_mf8_vg4 (0, w12 + 14), + z28 = svreadz_ver_za8_mf8_vg4 (0, w12 + 14)) + /* ** readz_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -149,6 +220,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x4_t, z4 = svreadz_ver_za8_u8_vg4 (0, w12 - 1), z4 = svreadz_ver_za8_u8_vg4 (0, w12 - 1)) +/* +** readz_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** movaz {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x4_t, + z4 = svreadz_ver_za8_mf8_vg4 (0, w12 - 1), + z4 = svreadz_ver_za8_mf8_vg4 (0, w12 - 1)) + /* ** readz_za8_u8_z28_0_w16: ** mov (w1[2-5]), w16 @@ -158,3 +239,12 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x4_t, TEST_READ_ZA_XN (readz_za8_u8_z28_0_w16, svuint8x4_t, z28 = svreadz_ver_za8_u8_vg4 (0, w16), z28 = svreadz_ver_za8_u8_vg4 (0, w16)) +/* +** readz_za8_mf8_z28_0_w16: +** mov (w1[2-5]), w16 +** movaz {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w16, svmfloat8x4_t, + z28 = svreadz_ver_za8_mf8_vg4 (0, w16), + z28 = svreadz_ver_za8_mf8_vg4 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c index 7bdb17d7e79..f4d40315acd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c @@ -36,6 +36,16 @@ TEST_READ_ZA_XN (readz_w7_z0, svuint8x2_t, z0 = svreadz_za8_u8_vg1x2 (w7), z0 = svreadz_za8_u8_vg1x2 (w7)) +/* +** readz_mf8_w7_z0: +** mov (w8|w9|w10|w11), w7 +** movaz {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w7_z0, svmfloat8x2_t, + z0 = svreadz_za8_mf8_vg1x2 (w7), + z0 = svreadz_za8_mf8_vg1x2 (w7)) + /* ** readz_w8_z0: ** movaz {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\] @@ -65,6 +75,16 @@ TEST_READ_ZA_XN (readz_w12_z0, svuint8x2_t, z0 = svreadz_za8_u8_vg1x2 (w12), z0 = svreadz_za8_u8_vg1x2 (w12)) +/* +** readz_mf8_w12_z0: +** mov (w8|w9|w10|w11), w12 +** movaz {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w12_z0, svmfloat8x2_t, + z0 = svreadz_za8_mf8_vg1x2 (w12), + z0 = svreadz_za8_mf8_vg1x2 (w12)) + /* ** readz_w8p7_z0: ** movaz {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\] @@ -94,6 +114,16 @@ TEST_READ_ZA_XN (readz_w8m1_z0, svuint8x2_t, z0 = svreadz_za8_u8_vg1x2 (w8 - 1), z0 = svreadz_za8_u8_vg1x2 (w8 - 1)) +/* +** readz_mf8_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** movaz {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w8m1_z0, svmfloat8x2_t, + z0 = svreadz_za8_mf8_vg1x2 (w8 - 1), + z0 = svreadz_za8_mf8_vg1x2 (w8 - 1)) + /* ** readz_w8_z18: ** movaz {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\] @@ -103,6 +133,15 @@ TEST_READ_ZA_XN (readz_w8_z18, svuint8x2_t, z18 = svreadz_za8_u8_vg1x2 (w8), z18 = svreadz_za8_u8_vg1x2 (w8)) +/* +** readz_mf8_w8_z18: +** movaz {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w8_z18, svmfloat8x2_t, + z18 = svreadz_za8_mf8_vg1x2 (w8), + z18 = svreadz_za8_mf8_vg1x2 (w8)) + /* Leave the assembler to check for correctness for misaligned registers. */ /* @@ -124,3 +163,12 @@ TEST_READ_ZA_XN (readz_w8_z23, svint8x2_t, TEST_READ_ZA_XN (readz_w8_z28, svuint8x2_t, z28 = svreadz_za8_u8_vg1x2 (w8), z28 = svreadz_za8_u8_vg1x2 (w8)) + +/* +** readz_mf8_w8_z28: +** movaz {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w8_z28, svmfloat8x2_t, + z28 = svreadz_za8_mf8_vg1x2 (w8), + z28 = svreadz_za8_mf8_vg1x2 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c index 02beaae85c6..d9be244c62c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c @@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_w0_z0, svuint8x4_t, z0 = svreadz_za8_u8_vg1x4 (w0), z0 = svreadz_za8_u8_vg1x4 (w0)) +/* +** readz_mf8_w0_z0: +** mov (w8|w9|w10|w11), w0 +** movaz {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w0_z0, svmfloat8x4_t, + z0 = svreadz_za8_mf8_vg1x4 (w0), + z0 = svreadz_za8_mf8_vg1x4 (w0)) + /* ** readz_w7_z0: ** mov (w8|w9|w10|w11), w7 @@ -55,6 +65,16 @@ TEST_READ_ZA_XN (readz_w11_z0, svuint8x4_t, z0 = svreadz_za8_u8_vg1x4 (w11)) +/* +** readz_mf8_w11_z0: +** movaz {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w11_z0, svmfloat8x4_t, + z0 = svreadz_za8_mf8_vg1x4 (w11), + z0 = svreadz_za8_mf8_vg1x4 (w11)) + + /* ** readz_w12_z0: ** mov (w8|w9|w10|w11), w12 @@ -84,6 +104,16 @@ TEST_READ_ZA_XN (readz_w8p8_z0, svuint8x4_t, z0 = svreadz_za8_u8_vg1x4 (w8 + 8), z0 = svreadz_za8_u8_vg1x4 (w8 + 8)) +/* +** readz_mf8_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** movaz {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w8p8_z0, svmfloat8x4_t, + z0 = svreadz_za8_mf8_vg1x4 (w8 + 8), + z0 = svreadz_za8_mf8_vg1x4 (w8 + 8)) + /* ** readz_w8m1_z0: ** sub (w8|w9|w10|w11), w8, #?1 @@ -118,6 +148,19 @@ TEST_READ_ZA_XN (readz_w8_z18, svuint8x4_t, z18 = svreadz_za8_u8_vg1x4 (w8), z18 = svreadz_za8_u8_vg1x4 (w8)) +/* +** readz_mf8_w8_z18: +** movaz [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w8_z18, svmfloat8x4_t, + z18 = svreadz_za8_mf8_vg1x4 (w8), + z18 = svreadz_za8_mf8_vg1x4 (w8)) + /* ** readz_w8_z23: ** movaz [^\n]+, za\.d\[w8, 0, vgx4\] @@ -131,6 +174,19 @@ TEST_READ_ZA_XN (readz_w8_z23, svuint8x4_t, z23 = svreadz_za8_u8_vg1x4 (w8), z23 = svreadz_za8_u8_vg1x4 (w8)) +/* +** readz_mf8_w8_z23: +** movaz [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (readz_mf8_w8_z23, svmfloat8x4_t, + z23 = svreadz_za8_mf8_vg1x4 (w8), + z23 = svreadz_za8_mf8_vg1x4 (w8)) + /* ** readz_w8_z28: ** movaz {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\] diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x2.c new file mode 100644 index 00000000000..1192aa84dc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svmfloat8x2_t, z0, + svsel_mf8_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svmfloat8x2_t, z0, + svsel_mf8_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.b - z1\.b}, pn8, {z4\.b - z5\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svmfloat8x2_t, z0, + svsel_mf8_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.b - z5\.b}, pn8, {z18\.b - z19\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svmfloat8x2_t, z4, + svsel_mf8_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.b - z19\.b}, pn15, {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svmfloat8x2_t, z18, + svsel_mf8_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.b - z19\.b}, pn8, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svmfloat8x2_t, z18, + svsel_mf8_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.b - z1\.b}, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svmfloat8x2_t, z23, + svsel_mf8_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.b - z1\.b}, pn15, {[^}]+}, {z28\.b - z29\.b} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svmfloat8x2_t, z0, + svsel_mf8_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.b - z1\.b}, pn8, {z28\.b - z29\.b}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svmfloat8x2_t, z0, + svsel_mf8_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x4.c new file mode 100644 index 00000000000..ddcba0318d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svmfloat8x4_t, z0, + svsel_mf8_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svmfloat8x4_t, z0, + svsel_mf8_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.b - z3\.b}, pn8, {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svmfloat8x4_t, z0, + svsel_mf8_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.b - z7\.b}, pn8, {[^}]+}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svmfloat8x4_t, z4, + svsel_mf8_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.b - z31\.b}, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svmfloat8x4_t, z18, + svsel_mf8_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svmfloat8x4_t, z18, + svsel_mf8_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.b - z3\.b}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svmfloat8x4_t, z23, + svsel_mf8_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x2.c new file mode 100644 index 00000000000..c778c139e8e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_mf8_base: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_base, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_mf8_index: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_index, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_mf8_1: +** incb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_1, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0 + svcntb (), z0), + svst1 (pn8, x0 + svcntb (), z0)) + +/* +** st1_mf8_2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_2, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0 + svcntb () * 2, z0), + svst1 (pn8, x0 + svcntb () * 2, z0)) + +/* +** st1_mf8_14: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_14, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0 + svcntb () * 14, z0), + svst1 (pn8, x0 + svcntb () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_mf8_16: +** incb x0, all, mul #16 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_16, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0 + svcntb () * 16, z0), + svst1 (pn8, x0 + svcntb () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_mf8_m1: +** decb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m1, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0 - svcntb (), z0), + svst1 (pn8, x0 - svcntb (), z0)) + +/* +** st1_mf8_m2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m2, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0 - svcntb () * 2, z0), + svst1 (pn8, x0 - svcntb () * 2, z0)) + +/* +** st1_mf8_m16: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m16, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0 - svcntb () * 16, z0), + svst1 (pn8, x0 - svcntb () * 16, z0)) + +/* +** st1_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m18, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0 - svcntb () * 18, z0), + svst1 (pn8, x0 - svcntb () * 18, z0)) + +/* +** st1_mf8_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_z17, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_mf8_z22: +** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_z22, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_mf8_z28: +** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_z28, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_pn0, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_pn7, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_mf8_pn15: +** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_pn15, svmfloat8x2_t, mfloat8_t, + svst1_mf8_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_mf8_0: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_mf8_1: +** incb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_mf8_2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_mf8_14: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_mf8_16: +** incb x0, all, mul #16 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_mf8_m1: +** decb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_mf8_m2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_mf8_m16: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t, + svst1_vnum_mf8_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x4.c new file mode 100644 index 00000000000..5f60757f07b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_mf8_base: +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_base, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_mf8_index: +** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_index, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_mf8_1: +** incb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_1, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 + svcntb (), z0), + svst1 (pn8, x0 + svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_mf8_2: +** incb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_2, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 + svcntb () * 2, z0), + svst1 (pn8, x0 + svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_mf8_3: +** incb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_3, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 + svcntb () * 3, z0), + svst1 (pn8, x0 + svcntb () * 3, z0)) + +/* +** st1_mf8_4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_4, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 + svcntb () * 4, z0), + svst1 (pn8, x0 + svcntb () * 4, z0)) + +/* +** st1_mf8_28: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_28, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 + svcntb () * 28, z0), + svst1 (pn8, x0 + svcntb () * 28, z0)) + +/* +** st1_mf8_32: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_32, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 + svcntb () * 32, z0), + svst1 (pn8, x0 + svcntb () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_mf8_m1: +** decb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m1, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 - svcntb (), z0), + svst1 (pn8, x0 - svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_mf8_m2: +** decb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m2, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 - svcntb () * 2, z0), + svst1 (pn8, x0 - svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_mf8_m3: +** decb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m3, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 - svcntb () * 3, z0), + svst1 (pn8, x0 - svcntb () * 3, z0)) + +/* +** st1_mf8_m4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m4, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 - svcntb () * 4, z0), + svst1 (pn8, x0 - svcntb () * 4, z0)) + +/* +** st1_mf8_m32: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m32, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 - svcntb () * 32, z0), + svst1 (pn8, x0 - svcntb () * 32, z0)) + +/* +** st1_mf8_m36: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_m36, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0 - svcntb () * 36, z0), + svst1 (pn8, x0 - svcntb () * 36, z0)) + +/* +** st1_mf8_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_z17, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_mf8_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_z22, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_mf8_z28: +** st1b {z28\.b - z31\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_z28, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_pn0, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_pn7, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_mf8_pn15: +** st1b {z0\.b - z3\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_mf8_pn15, svmfloat8x4_t, mfloat8_t, + svst1_mf8_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_mf8_0: +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_mf8_1: +** incb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_mf8_2: +** incb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_mf8_3: +** incb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_mf8_4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_mf8_28: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_mf8_32: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_mf8_m1: +** decb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_mf8_m2: +** decb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_mf8_m3: +** decb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_mf8_m4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_mf8_m32: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_mf8_m36: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1b {z0\.b - z3\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t, + svst1_vnum_mf8_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x2.c new file mode 100644 index 00000000000..f9a90fbe9b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_mf8_base: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_mf8_index: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_1: +** incb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + svcntb (), z0), + svstnt1 (pn8, x0 + svcntb (), z0)) + +/* +** stnt1_mf8_2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + svcntb () * 2, z0), + svstnt1 (pn8, x0 + svcntb () * 2, z0)) + +/* +** stnt1_mf8_14: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_14, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + svcntb () * 14, z0), + svstnt1 (pn8, x0 + svcntb () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_16: +** incb x0, all, mul #16 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_16, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + svcntb () * 16, z0), + svstnt1 (pn8, x0 + svcntb () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_m1: +** decb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 - svcntb (), z0), + svstnt1 (pn8, x0 - svcntb (), z0)) + +/* +** stnt1_mf8_m2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 - svcntb () * 2, z0), + svstnt1 (pn8, x0 - svcntb () * 2, z0)) + +/* +** stnt1_mf8_m16: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m16, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 - svcntb () * 16, z0), + svstnt1 (pn8, x0 - svcntb () * 16, z0)) + +/* +** stnt1_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m18, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 - svcntb () * 18, z0), + svstnt1 (pn8, x0 - svcntb () * 18, z0)) + +/* +** stnt1_mf8_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_mf8_z22: +** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_mf8_z28: +** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_mf8_pn15: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_mf8_0: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_1: +** incb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_mf8_2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_mf8_14: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_16: +** incb x0, all, mul #16 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_m1: +** decb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_mf8_m2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_mf8_m16: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x4.c new file mode 100644 index 00000000000..a204f796982 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_mf8_base: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_mf8_index: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_1: +** incb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb (), z0), + svstnt1 (pn8, x0 + svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_2: +** incb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 2, z0), + svstnt1 (pn8, x0 + svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_3: +** incb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_3, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 3, z0), + svstnt1 (pn8, x0 + svcntb () * 3, z0)) + +/* +** stnt1_mf8_4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_4, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 4, z0), + svstnt1 (pn8, x0 + svcntb () * 4, z0)) + +/* +** stnt1_mf8_28: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_28, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 28, z0), + svstnt1 (pn8, x0 + svcntb () * 28, z0)) + +/* +** stnt1_mf8_32: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_32, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 32, z0), + svstnt1 (pn8, x0 + svcntb () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_m1: +** decb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb (), z0), + svstnt1 (pn8, x0 - svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_m2: +** decb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 2, z0), + svstnt1 (pn8, x0 - svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_m3: +** decb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m3, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 3, z0), + svstnt1 (pn8, x0 - svcntb () * 3, z0)) + +/* +** stnt1_mf8_m4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m4, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 4, z0), + svstnt1 (pn8, x0 - svcntb () * 4, z0)) + +/* +** stnt1_mf8_m32: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m32, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 32, z0), + svstnt1 (pn8, x0 - svcntb () * 32, z0)) + +/* +** stnt1_mf8_m36: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m36, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 36, z0), + svstnt1 (pn8, x0 - svcntb () * 36, z0)) + +/* +** stnt1_mf8_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_mf8_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_mf8_z28: +** stnt1b {z28\.b - z31\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_mf8_pn15: +** stnt1b {z0\.b - z3\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_mf8_0: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_1: +** incb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_2: +** incb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_3: +** incb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_mf8_4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_mf8_28: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_mf8_32: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_m1: +** decb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_m2: +** decb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_m3: +** decb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_mf8_m4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_mf8_m32: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_mf8_m36: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x2.c new file mode 100644 index 00000000000..f107b4c7a18 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.b - z1\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (uzp_z0_z0, svmfloat8x2_t, z0, + svuzp_mf8_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.b - z1\.b}, z4\.b, z5\.b +** ret +*/ +TEST_XN (uzp_z0_z4, svmfloat8x2_t, z0, + svuzp_mf8_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.b - z5\.b}, z18\.b, z19\.b +** ret +*/ +TEST_XN (uzp_z4_z18, svmfloat8x2_t, z4, + svuzp_mf8_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.b - z19\.b}, z23\.b, z24\.b +** ret +*/ +TEST_XN (uzp_z18_z23, svmfloat8x2_t, z18, + svuzp_mf8_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.b, z29\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svmfloat8x2_t, z23, + svuzp_mf8_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.b - z29\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (uzp_z28_z0, svmfloat8x2_t, z28, + svuzp_mf8_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: { xfail aarch64_big_endian } +** uzp {z28\.b - z29\.b}, z0\.b, z23\.b +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svmfloat8x2_t, z28, + svuzp_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.b - z29\.b}, z5\.b, z19\.b +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svmfloat8x2_t, z28, + svuzp_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x4.c new file mode 100644 index 00000000000..bbaf26c85a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (uzp_z0_z0, svmfloat8x4_t, z0, + svuzp_mf8_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (uzp_z0_z4, svmfloat8x4_t, z0, + svuzp_mf8_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.b - z7\.b}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svmfloat8x4_t, z4, + svuzp_mf8_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svmfloat8x4_t, z18, + svuzp_mf8_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svmfloat8x4_t, z23, + svuzp_mf8_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (uzp_z28_z0, svmfloat8x4_t, z28, + svuzp_mf8_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x2.c new file mode 100644 index 00000000000..cef514c46e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svmfloat8x2_t, z0, + svuzpq_mf8_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svmfloat8x2_t, z0, + svuzpq_mf8_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svmfloat8x2_t, z4, + svuzpq_mf8_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svmfloat8x2_t, z18, + svuzpq_mf8_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svmfloat8x2_t, z23, + svuzpq_mf8_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svmfloat8x2_t, z28, + svuzpq_mf8_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svmfloat8x2_t, z28, + svuzpq_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svmfloat8x2_t, z28, + svuzpq_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x4.c new file mode 100644 index 00000000000..6b348c95f83 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svmfloat8x4_t, z0, + svuzpq_mf8_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svmfloat8x4_t, z0, + svuzpq_mf8_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svmfloat8x4_t, z4, + svuzpq_mf8_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svmfloat8x4_t, z18, + svuzpq_mf8_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svmfloat8x4_t, z23, + svuzpq_mf8_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svmfloat8x4_t, z28, + svuzpq_mf8_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c index a2af846b60b..8df504cb423 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c @@ -22,6 +22,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x2_t, svwrite_hor_za8_u8_vg2 (0, 1, z4), svwrite_hor_za8_u8_vg2 (0, 1, z4)) +/* +** write_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x2_t, + svwrite_hor_za8_mf8_vg2 (0, 1, z4), + svwrite_hor_za8_mf8_vg2 (0, 1, z4)) + /* ** write_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -50,6 +60,15 @@ TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x2_t, svwrite_hor_za8_u8_vg2 (0, w15, z18), svwrite_hor_za8_u8_vg2 (0, w15, z18)) +/* +** write_za8_mf8_z18_0_w15: +** mova za0h\.b\[w15, 0:1\], {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x2_t, + svwrite_hor_za8_mf8_vg2 (0, w15, z18), + svwrite_hor_za8_mf8_vg2 (0, w15, z18)) + /* ** write_za8_s8_z23_0_w12p14: ** mov [^\n]+ @@ -71,6 +90,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x2_t, svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4), svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4)) +/* +** write_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x2_t, + svwrite_hor_za8_mf8_vg2 (0, w12 + 1, z4), + svwrite_hor_za8_mf8_vg2 (0, w12 + 1, z4)) + /* ** write_za8_s8_z28_0_w12p2: ** mova za0h\.b\[w12, 2:3\], {z28\.b - z29\.b} @@ -90,6 +119,16 @@ TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x2_t, svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0), svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0)) +/* +** write_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0h\.b\[\1, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x2_t, + svwrite_hor_za8_mf8_vg2 (0, w15 + 3, z0), + svwrite_hor_za8_mf8_vg2 (0, w15 + 3, z0)) + /* ** write_za8_u8_z4_0_w15p12: ** mova za0h\.b\[w15, 12:13\], {z4\.b - z5\.b} @@ -99,6 +138,15 @@ TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x2_t, svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4), svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4)) +/* +** write_za8_mf8_z4_0_w15p12: +** mova za0h\.b\[w15, 12:13\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x2_t, + svwrite_hor_za8_mf8_vg2 (0, w15 + 12, z4), + svwrite_hor_za8_mf8_vg2 (0, w15 + 12, z4)) + /* ** write_za8_u8_z28_0_w12p15: ** add (w[0-9]+), w12, #?15 @@ -109,6 +157,16 @@ TEST_ZA_XN (write_za8_u8_z28_0_w12p15, svuint8x2_t, svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28), svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28)) +/* +** write_za8_mf8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** mova za0h\.b\[\1, 0:1\], {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z28_0_w12p15, svmfloat8x2_t, + svwrite_hor_za8_mf8_vg2 (0, w12 + 15, z28), + svwrite_hor_za8_mf8_vg2 (0, w12 + 15, z28)) + /* ** write_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -129,6 +187,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t, svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4), svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4)) +/* +** write_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x2_t, + svwrite_hor_za8_mf8_vg2 (0, w12 - 1, z4), + svwrite_hor_za8_mf8_vg2 (0, w12 - 1, z4)) + /* ** write_za8_u8_z18_0_w16: ** mov (w1[2-5]), w16 @@ -138,3 +206,13 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t, TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t, svwrite_hor_za8_u8_vg2 (0, w16, z18), svwrite_hor_za8_u8_vg2 (0, w16, z18)) + +/* +** write_za8_mf8_z18_0_w16: +** mov (w1[2-5]), w16 +** mova za0h\.b\[\1, 0:1\], {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z18_0_w16, svmfloat8x2_t, + svwrite_hor_za8_mf8_vg2 (0, w16, z18), + svwrite_hor_za8_mf8_vg2 (0, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c index e333ce699e3..70a2e95db96 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c @@ -22,6 +22,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x4_t, svwrite_hor_za8_u8_vg4 (0, 1, z4), svwrite_hor_za8_u8_vg4 (0, 1, z4)) +/* +** write_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x4_t, + svwrite_hor_za8_mf8_vg4 (0, 1, z4), + svwrite_hor_za8_mf8_vg4 (0, 1, z4)) + /* ** write_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -54,6 +64,19 @@ TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x4_t, svwrite_hor_za8_u8_vg4 (0, w15, z18), svwrite_hor_za8_u8_vg4 (0, w15, z18)) +/* +** write_za8_mf8_z18_0_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0h\.b\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x4_t, + svwrite_hor_za8_mf8_vg4 (0, w15, z18), + svwrite_hor_za8_mf8_vg4 (0, w15, z18)) + /* ** write_za8_s8_z23_0_w12p12: ** mov [^\n]+ @@ -77,6 +100,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x4_t, svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4), svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4)) +/* +** write_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x4_t, + svwrite_hor_za8_mf8_vg4 (0, w12 + 1, z4), + svwrite_hor_za8_mf8_vg4 (0, w12 + 1, z4)) + /* ** write_za8_s8_z28_0_w12p2: ** add (w[0-9]+), w12, #?2 @@ -97,6 +130,16 @@ TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x4_t, svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0), svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0)) +/* +** write_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0h\.b\[\1, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x4_t, + svwrite_hor_za8_mf8_vg4 (0, w15 + 3, z0), + svwrite_hor_za8_mf8_vg4 (0, w15 + 3, z0)) + /* ** write_za8_u8_z0_0_w12p4: ** mova za0h\.b\[w12, 4:7\], {z0\.b - z3\.b} @@ -106,6 +149,15 @@ TEST_ZA_XN (write_za8_u8_z0_0_w12p4, svuint8x4_t, svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0), svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0)) +/* +** write_za8_mf8_z0_0_w12p4: +** mova za0h\.b\[w12, 4:7\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z0_0_w12p4, svmfloat8x4_t, + svwrite_hor_za8_mf8_vg4 (0, w12 + 4, z0), + svwrite_hor_za8_mf8_vg4 (0, w12 + 4, z0)) + /* ** write_za8_u8_z4_0_w15p12: ** mova za0h\.b\[w15, 12:15\], {z4\.b - z7\.b} @@ -115,6 +167,15 @@ TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x4_t, svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4), svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4)) +/* +** write_za8_mf8_z4_0_w15p12: +** mova za0h\.b\[w15, 12:15\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x4_t, + svwrite_hor_za8_mf8_vg4 (0, w15 + 12, z4), + svwrite_hor_za8_mf8_vg4 (0, w15 + 12, z4)) + /* ** write_za8_u8_z28_0_w12p14: ** add (w[0-9]+), w12, #?14 @@ -125,6 +186,16 @@ TEST_ZA_XN (write_za8_u8_z28_0_w12p14, svuint8x4_t, svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28), svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28)) +/* +** write_za8_mf8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z28_0_w12p14, svmfloat8x4_t, + svwrite_hor_za8_mf8_vg4 (0, w12 + 14, z28), + svwrite_hor_za8_mf8_vg4 (0, w12 + 14, z28)) + /* ** write_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -145,6 +216,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t, svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4), svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4)) +/* +** write_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x4_t, + svwrite_hor_za8_mf8_vg4 (0, w12 - 1, z4), + svwrite_hor_za8_mf8_vg4 (0, w12 - 1, z4)) + /* ** write_za8_u8_z28_0_w16: ** mov (w1[2-5]), w16 @@ -154,3 +235,13 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t, TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t, svwrite_hor_za8_u8_vg4 (0, w16, z28), svwrite_hor_za8_u8_vg4 (0, w16, z28)) + +/* +** write_za8_mf8_z28_0_w16: +** mov (w1[2-5]), w16 +** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z28_0_w16, svmfloat8x4_t, + svwrite_hor_za8_mf8_vg4 (0, w16, z28), + svwrite_hor_za8_mf8_vg4 (0, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c index ce3dbdd8729..a576b753301 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c @@ -22,6 +22,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x2_t, svwrite_ver_za8_u8_vg2 (0, 1, z4), svwrite_ver_za8_u8_vg2 (0, 1, z4)) +/* +** write_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x2_t, + svwrite_ver_za8_mf8_vg2 (0, 1, z4), + svwrite_ver_za8_mf8_vg2 (0, 1, z4)) + /* ** write_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -50,6 +60,15 @@ TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x2_t, svwrite_ver_za8_u8_vg2 (0, w15, z18), svwrite_ver_za8_u8_vg2 (0, w15, z18)) +/* +** write_za8_mf8_z18_0_w15: +** mova za0v\.b\[w15, 0:1\], {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x2_t, + svwrite_ver_za8_mf8_vg2 (0, w15, z18), + svwrite_ver_za8_mf8_vg2 (0, w15, z18)) + /* ** write_za8_s8_z23_0_w12p14: ** mov [^\n]+ @@ -71,6 +90,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x2_t, svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4), svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4)) +/* +** write_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x2_t, + svwrite_ver_za8_mf8_vg2 (0, w12 + 1, z4), + svwrite_ver_za8_mf8_vg2 (0, w12 + 1, z4)) + /* ** write_za8_s8_z28_0_w12p2: ** mova za0v\.b\[w12, 2:3\], {z28\.b - z29\.b} @@ -90,6 +119,16 @@ TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x2_t, svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0), svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0)) +/* +** write_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0v\.b\[\1, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x2_t, + svwrite_ver_za8_mf8_vg2 (0, w15 + 3, z0), + svwrite_ver_za8_mf8_vg2 (0, w15 + 3, z0)) + /* ** write_za8_u8_z4_0_w15p12: ** mova za0v\.b\[w15, 12:13\], {z4\.b - z5\.b} @@ -99,6 +138,15 @@ TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x2_t, svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4), svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4)) +/* +** write_za8_mf8_z4_0_w15p12: +** mova za0v\.b\[w15, 12:13\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x2_t, + svwrite_ver_za8_mf8_vg2 (0, w15 + 12, z4), + svwrite_ver_za8_mf8_vg2 (0, w15 + 12, z4)) + /* ** write_za8_u8_z28_0_w12p15: ** add (w[0-9]+), w12, #?15 @@ -109,6 +157,16 @@ TEST_ZA_XN (write_za8_u8_z28_0_w12p15, svuint8x2_t, svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28), svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28)) +/* +** write_za8_mf8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** mova za0v\.b\[\1, 0:1\], {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z28_0_w12p15, svmfloat8x2_t, + svwrite_ver_za8_mf8_vg2 (0, w12 + 15, z28), + svwrite_ver_za8_mf8_vg2 (0, w12 + 15, z28)) + /* ** write_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -129,6 +187,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t, svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4), svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4)) +/* +** write_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x2_t, + svwrite_ver_za8_mf8_vg2 (0, w12 - 1, z4), + svwrite_ver_za8_mf8_vg2 (0, w12 - 1, z4)) + /* ** write_za8_u8_z18_0_w16: ** mov (w1[2-5]), w16 @@ -138,3 +206,13 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t, TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t, svwrite_ver_za8_u8_vg2 (0, w16, z18), svwrite_ver_za8_u8_vg2 (0, w16, z18)) + +/* +** write_za8_mf8_z18_0_w16: +** mov (w1[2-5]), w16 +** mova za0v\.b\[\1, 0:1\], {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z18_0_w16, svmfloat8x2_t, + svwrite_ver_za8_mf8_vg2 (0, w16, z18), + svwrite_ver_za8_mf8_vg2 (0, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c index 8972fed59e3..0444f80fa42 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c @@ -22,6 +22,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x4_t, svwrite_ver_za8_u8_vg4 (0, 1, z4), svwrite_ver_za8_u8_vg4 (0, 1, z4)) +/* +** write_za8_mf8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x4_t, + svwrite_ver_za8_mf8_vg4 (0, 1, z4), + svwrite_ver_za8_mf8_vg4 (0, 1, z4)) + /* ** write_za8_s8_z28_0_w11: ** mov (w1[2-5]), w11 @@ -54,6 +64,19 @@ TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x4_t, svwrite_ver_za8_u8_vg4 (0, w15, z18), svwrite_ver_za8_u8_vg4 (0, w15, z18)) +/* +** write_za8_mf8_z18_0_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0v\.b\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x4_t, + svwrite_ver_za8_mf8_vg4 (0, w15, z18), + svwrite_ver_za8_mf8_vg4 (0, w15, z18)) + /* ** write_za8_s8_z23_0_w12p12: ** mov [^\n]+ @@ -77,6 +100,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x4_t, svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4), svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4)) +/* +** write_za8_mf8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x4_t, + svwrite_ver_za8_mf8_vg4 (0, w12 + 1, z4), + svwrite_ver_za8_mf8_vg4 (0, w12 + 1, z4)) + /* ** write_za8_s8_z28_0_w12p2: ** add (w[0-9]+), w12, #?2 @@ -97,6 +130,16 @@ TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x4_t, svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0), svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0)) +/* +** write_za8_mf8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0v\.b\[\1, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x4_t, + svwrite_ver_za8_mf8_vg4 (0, w15 + 3, z0), + svwrite_ver_za8_mf8_vg4 (0, w15 + 3, z0)) + /* ** write_za8_u8_z0_0_w12p4: ** mova za0v\.b\[w12, 4:7\], {z0\.b - z3\.b} @@ -106,6 +149,15 @@ TEST_ZA_XN (write_za8_u8_z0_0_w12p4, svuint8x4_t, svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0), svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0)) +/* +** write_za8_mf8_z0_0_w12p4: +** mova za0v\.b\[w12, 4:7\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z0_0_w12p4, svmfloat8x4_t, + svwrite_ver_za8_mf8_vg4 (0, w12 + 4, z0), + svwrite_ver_za8_mf8_vg4 (0, w12 + 4, z0)) + /* ** write_za8_u8_z4_0_w15p12: ** mova za0v\.b\[w15, 12:15\], {z4\.b - z7\.b} @@ -115,6 +167,15 @@ TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x4_t, svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4), svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4)) +/* +** write_za8_mf8_z4_0_w15p12: +** mova za0v\.b\[w15, 12:15\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x4_t, + svwrite_ver_za8_mf8_vg4 (0, w15 + 12, z4), + svwrite_ver_za8_mf8_vg4 (0, w15 + 12, z4)) + /* ** write_za8_u8_z28_0_w12p14: ** add (w[0-9]+), w12, #?14 @@ -125,6 +186,16 @@ TEST_ZA_XN (write_za8_u8_z28_0_w12p14, svuint8x4_t, svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28), svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28)) +/* +** write_za8_mf8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z28_0_w12p14, svmfloat8x4_t, + svwrite_ver_za8_mf8_vg4 (0, w12 + 14, z28), + svwrite_ver_za8_mf8_vg4 (0, w12 + 14, z28)) + /* ** write_za8_s8_z0_0_w15p16: ** add (w[0-9]+), w15, #?16 @@ -145,6 +216,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t, svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4), svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4)) +/* +** write_za8_mf8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x4_t, + svwrite_ver_za8_mf8_vg4 (0, w12 - 1, z4), + svwrite_ver_za8_mf8_vg4 (0, w12 - 1, z4)) + /* ** write_za8_u8_z28_0_w16: ** mov (w1[2-5]), w16 @@ -154,3 +235,13 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t, TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t, svwrite_ver_za8_u8_vg4 (0, w16, z28), svwrite_ver_za8_u8_vg4 (0, w16, z28)) + +/* +** write_za8_mf8_z28_0_w16: +** mov (w1[2-5]), w16 +** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_mf8_z28_0_w16, svmfloat8x4_t, + svwrite_ver_za8_mf8_vg4 (0, w16, z28), + svwrite_ver_za8_mf8_vg4 (0, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c index 4b83a37edd2..836118b0fa7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c @@ -32,6 +32,16 @@ TEST_ZA_XN (write_w7_z0, svuint8x2_t, svwrite_za8_u8_vg1x2 (w7, z0), svwrite_za8_vg1x2 (w7, z0)) +/* +** write_mf8_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_mf8_w7_z0, svmfloat8x2_t, + svwrite_za8_mf8_vg1x2 (w7, z0), + svwrite_za8_vg1x2 (w7, z0)) + /* ** write_w8_z0: ** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} @@ -61,6 +71,16 @@ TEST_ZA_XN (write_w12_z0, svuint8x2_t, svwrite_za8_u8_vg1x2 (w12, z0), svwrite_za8_vg1x2 (w12, z0)) +/* +** write_mf8_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_mf8_w12_z0, svmfloat8x2_t, + svwrite_za8_mf8_vg1x2 (w12, z0), + svwrite_za8_vg1x2 (w12, z0)) + /* ** write_w8p7_z0: ** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} @@ -90,6 +110,16 @@ TEST_ZA_XN (write_w8m1_z0, svuint8x2_t, svwrite_za8_u8_vg1x2 (w8 - 1, z0), svwrite_za8_vg1x2 (w8 - 1, z0)) +/* +** write_mf8_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_mf8_w8m1_z0, svmfloat8x2_t, + svwrite_za8_mf8_vg1x2 (w8 - 1, z0), + svwrite_za8_vg1x2 (w8 - 1, z0)) + /* ** write_w8_z18: ** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} @@ -99,6 +129,15 @@ TEST_ZA_XN (write_w8_z18, svuint8x2_t, svwrite_za8_u8_vg1x2 (w8, z18), svwrite_za8_vg1x2 (w8, z18)) +/* +** write_mf8_w8_z18: +** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (write_mf8_w8_z18, svmfloat8x2_t, + svwrite_za8_mf8_vg1x2 (w8, z18), + svwrite_za8_vg1x2 (w8, z18)) + /* Leave the assembler to check for correctness for misaligned registers. */ /* @@ -120,3 +159,12 @@ TEST_ZA_XN (write_w8_z23, svint8x2_t, TEST_ZA_XN (write_w8_z28, svuint8x2_t, svwrite_za8_u8_vg1x2 (w8, z28), svwrite_za8_vg1x2 (w8, z28)) + +/* +** write_mf8_w8_z28: +** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (write_mf8_w8_z28, svmfloat8x2_t, + svwrite_za8_mf8_vg1x2 (w8, z28), + svwrite_za8_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c index a529bf9fcca..649a5c0ca63 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c @@ -22,6 +22,16 @@ TEST_ZA_XN (write_w0_z0, svuint8x4_t, svwrite_za8_u8_vg1x4 (w0, z0), svwrite_za8_vg1x4 (w0, z0)) +/* +** write_mf8_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_mf8_w0_z0, svmfloat8x4_t, + svwrite_za8_mf8_vg1x4 (w0, z0), + svwrite_za8_vg1x4 (w0, z0)) + /* ** write_w7_z0: ** mov (w8|w9|w10|w11), w7 @@ -50,6 +60,14 @@ TEST_ZA_XN (write_w11_z0, svuint8x4_t, svwrite_za8_u8_vg1x4 (w11, z0), svwrite_za8_vg1x4 (w11, z0)) +/* +** write_mf8_w11_z0: +** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_mf8_w11_z0, svmfloat8x4_t, + svwrite_za8_mf8_vg1x4 (w11, z0), + svwrite_za8_vg1x4 (w11, z0)) /* ** write_w12_z0: @@ -80,6 +98,16 @@ TEST_ZA_XN (write_w8p8_z0, svuint8x4_t, svwrite_za8_u8_vg1x4 (w8 + 8, z0), svwrite_za8_vg1x4 (w8 + 8, z0)) +/* +** write_mf8_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_mf8_w8p8_z0, svmfloat8x4_t, + svwrite_za8_mf8_vg1x4 (w8 + 8, z0), + svwrite_za8_vg1x4 (w8 + 8, z0)) + /* ** write_w8m1_z0: ** sub (w8|w9|w10|w11), w8, #?1 @@ -114,6 +142,19 @@ TEST_ZA_XN (write_w8_z18, svuint8x4_t, svwrite_za8_u8_vg1x4 (w8, z18), svwrite_za8_vg1x4 (w8, z18)) +/* +** write_mf8_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_mf8_w8_z18, svmfloat8x4_t, + svwrite_za8_mf8_vg1x4 (w8, z18), + svwrite_za8_vg1x4 (w8, z18)) + /* ** write_w8_z23: ** mov [^\n]+ @@ -127,6 +168,19 @@ TEST_ZA_XN (write_w8_z23, svuint8x4_t, svwrite_za8_u8_vg1x4 (w8, z23), svwrite_za8_vg1x4 (w8, z23)) +/* +** write_mf8_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_mf8_w8_z23, svmfloat8x4_t, + svwrite_za8_mf8_vg1x4 (w8, z23), + svwrite_za8_vg1x4 (w8, z23)) + /* ** write_w8_z28: ** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x2.c new file mode 100644 index 00000000000..834a0e680a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.b - z1\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (zip_z0_z0, svmfloat8x2_t, z0, + svzip_mf8_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.b - z1\.b}, z4\.b, z5\.b +** ret +*/ +TEST_XN (zip_z0_z4, svmfloat8x2_t, z0, + svzip_mf8_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.b - z5\.b}, z18\.b, z19\.b +** ret +*/ +TEST_XN (zip_z4_z18, svmfloat8x2_t, z4, + svzip_mf8_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.b - z19\.b}, z23\.b, z24\.b +** ret +*/ +TEST_XN (zip_z18_z23, svmfloat8x2_t, z18, + svzip_mf8_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.b, z29\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svmfloat8x2_t, z23, + svzip_mf8_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.b - z29\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (zip_z28_z0, svmfloat8x2_t, z28, + svzip_mf8_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: { xfail aarch64_big_endian } +** zip {z28\.b - z29\.b}, z0\.b, z23\.b +** ret +*/ +TEST_XN (zip_z28_z0_z23, svmfloat8x2_t, z28, + svzip_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.b - z29\.b}, z5\.b, z19\.b +** ret +*/ +TEST_XN (zip_z28_z5_z19, svmfloat8x2_t, z28, + svzip_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x4.c new file mode 100644 index 00000000000..487e9b2d3fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (zip_z0_z0, svmfloat8x4_t, z0, + svzip_mf8_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (zip_z0_z4, svmfloat8x4_t, z0, + svzip_mf8_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.b - z7\.b}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svmfloat8x4_t, z4, + svzip_mf8_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svmfloat8x4_t, z18, + svzip_mf8_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svmfloat8x4_t, z23, + svzip_mf8_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (zip_z28_z0, svmfloat8x4_t, z28, + svzip_mf8_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x2.c new file mode 100644 index 00000000000..4dd4753461a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svmfloat8x2_t, z0, + svzipq_mf8_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svmfloat8x2_t, z0, + svzipq_mf8_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svmfloat8x2_t, z4, + svzipq_mf8_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svmfloat8x2_t, z18, + svzipq_mf8_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svmfloat8x2_t, z23, + svzipq_mf8_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svmfloat8x2_t, z28, + svzipq_mf8_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: { xfail aarch64_big_endian } +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svmfloat8x2_t, z28, + svzipq_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svmfloat8x2_t, z28, + svzipq_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x4.c new file mode 100644 index 00000000000..417eb387e4b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svmfloat8x4_t, z0, + svzipq_mf8_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svmfloat8x4_t, z0, + svzipq_mf8_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svmfloat8x4_t, z4, + svzipq_mf8_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svmfloat8x4_t, z18, + svzipq_mf8_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svmfloat8x4_t, z23, + svzipq_mf8_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svmfloat8x4_t, z28, + svzipq_mf8_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x2.c new file mode 100644 index 00000000000..d4073ab279d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x2.c @@ -0,0 +1,269 @@ +/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p1" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** ld1_mf8_base: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_mf8_index: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_1: +** incb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + svcntb ()), + z0 = svld1_x2 (pn8, x0 + svcntb ())) + +/* +** ld1_mf8_2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 2), + z0 = svld1_x2 (pn8, x0 + svcntb () * 2)) + +/* +** ld1_mf8_14: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_14, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 14), + z0 = svld1_x2 (pn8, x0 + svcntb () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_16: +** incb x0, all, mul #16 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_16, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 16), + z0 = svld1_x2 (pn8, x0 + svcntb () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_m1: +** decb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 - svcntb ()), + z0 = svld1_x2 (pn8, x0 - svcntb ())) + +/* +** ld1_mf8_m2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 2), + z0 = svld1_x2 (pn8, x0 - svcntb () * 2)) + +/* +** ld1_mf8_m16: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m16, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 16), + z0 = svld1_x2 (pn8, x0 - svcntb () * 16)) + +/* +** ld1_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m18, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 18), + z0 = svld1_x2 (pn8, x0 - svcntb () * 18)) + +/* +** ld1_mf8_z17: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x2_t, mfloat8_t, + z17 = svld1_mf8_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_mf8_z22: +** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x2_t, mfloat8_t, + z22 = svld1_mf8_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_mf8_z28: +** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x2_t, mfloat8_t, + z28 = svld1_mf8_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_mf8_pn15: +** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x2_t, mfloat8_t, + z0 = svld1_mf8_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_mf8_0: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_1: +** incb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_mf8_2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_mf8_14: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_16: +** incb x0, all, mul #16 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_m1: +** decb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_mf8_m2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_mf8_m16: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t, + z0 = svld1_vnum_mf8_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x4.c new file mode 100644 index 00000000000..84d053a4261 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x4.c @@ -0,0 +1,361 @@ +/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p1" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** ld1_mf8_base: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_mf8_index: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_1: +** incb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb ()), + z0 = svld1_x4 (pn8, x0 + svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_2: +** incb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 2), + z0 = svld1_x4 (pn8, x0 + svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_3: +** incb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_3, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 3), + z0 = svld1_x4 (pn8, x0 + svcntb () * 3)) + +/* +** ld1_mf8_4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_4, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 4), + z0 = svld1_x4 (pn8, x0 + svcntb () * 4)) + +/* +** ld1_mf8_28: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_28, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 28), + z0 = svld1_x4 (pn8, x0 + svcntb () * 28)) + +/* +** ld1_mf8_32: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_32, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 32), + z0 = svld1_x4 (pn8, x0 + svcntb () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_m1: +** decb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb ()), + z0 = svld1_x4 (pn8, x0 - svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_m2: +** decb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 2), + z0 = svld1_x4 (pn8, x0 - svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_mf8_m3: +** decb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m3, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 3), + z0 = svld1_x4 (pn8, x0 - svcntb () * 3)) + +/* +** ld1_mf8_m4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_mf8_m4, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 4), + z0 = svld1_x4 (pn8, x0 - svcntb () * 4)) + +/* +** ld1_mf8_m32: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m32, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 32), + z0 = svld1_x4 (pn8, x0 - svcntb () * 32)) + +/* +** ld1_mf8_m36: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_m36, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 36), + z0 = svld1_x4 (pn8, x0 - svcntb () * 36)) + +/* +** ld1_mf8_z17: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x4_t, mfloat8_t, + z17 = svld1_mf8_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_mf8_z22: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x4_t, mfloat8_t, + z22 = svld1_mf8_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_mf8_z28: +** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x4_t, mfloat8_t, + z28 = svld1_mf8_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_mf8_pn15: +** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x4_t, mfloat8_t, + z0 = svld1_mf8_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_mf8_0: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_1: +** incb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_2: +** incb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_3: +** incb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_mf8_4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_mf8_28: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_mf8_32: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_m1: +** decb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_m2: +** decb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_mf8_m3: +** decb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_mf8_m4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_mf8_m32: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_mf8_m36: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t, + z0 = svld1_vnum_mf8_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x2.c new file mode 100644 index 00000000000..60d2caa1568 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x2.c @@ -0,0 +1,269 @@ +/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p1" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** ldnt1_mf8_base: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_mf8_index: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_1: +** incb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb ()), + z0 = svldnt1_x2 (pn8, x0 + svcntb ())) + +/* +** ldnt1_mf8_2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2)) + +/* +** ldnt1_mf8_14: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_14, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_16: +** incb x0, all, mul #16 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_16, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_m1: +** decb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb ()), + z0 = svldnt1_x2 (pn8, x0 - svcntb ())) + +/* +** ldnt1_mf8_m2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2)) + +/* +** ldnt1_mf8_m16: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m16, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16)) + +/* +** ldnt1_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m18, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18)) + +/* +** ldnt1_mf8_z17: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x2_t, mfloat8_t, + z17 = svldnt1_mf8_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_mf8_z22: +** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x2_t, mfloat8_t, + z22 = svldnt1_mf8_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_mf8_z28: +** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x2_t, mfloat8_t, + z28 = svldnt1_mf8_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_mf8_pn15: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_mf8_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_mf8_0: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_1: +** incb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_mf8_2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_mf8_14: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_16: +** incb x0, all, mul #16 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_m1: +** decb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_mf8_m2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_mf8_m16: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x4.c new file mode 100644 index 00000000000..976b1e6f61c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x4.c @@ -0,0 +1,361 @@ +/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p1" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** ldnt1_mf8_base: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_mf8_index: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_1: +** incb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb ()), + z0 = svldnt1_x4 (pn8, x0 + svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_2: +** incb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_3: +** incb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_3, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3)) + +/* +** ldnt1_mf8_4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_4, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4)) + +/* +** ldnt1_mf8_28: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_28, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28)) + +/* +** ldnt1_mf8_32: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_32, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_m1: +** decb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb ()), + z0 = svldnt1_x4 (pn8, x0 - svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_m2: +** decb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_mf8_m3: +** decb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m3, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3)) + +/* +** ldnt1_mf8_m4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_mf8_m4, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4)) + +/* +** ldnt1_mf8_m32: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m32, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32)) + +/* +** ldnt1_mf8_m36: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_m36, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36)) + +/* +** ldnt1_mf8_z17: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x4_t, mfloat8_t, + z17 = svldnt1_mf8_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_mf8_z22: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x4_t, mfloat8_t, + z22 = svldnt1_mf8_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_mf8_z28: +** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x4_t, mfloat8_t, + z28 = svldnt1_mf8_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_mf8_pn15: +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_mf8_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_mf8_0: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_1: +** incb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_2: +** incb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_3: +** incb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_mf8_4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_mf8_28: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_mf8_32: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_m1: +** decb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_m2: +** decb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_mf8_m3: +** decb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_mf8_m4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_mf8_m32: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_mf8_m36: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t, + z0 = svldnt1_vnum_mf8_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_mf8.c new file mode 100644 index 00000000000..64d08509c16 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_mf8.c @@ -0,0 +1,80 @@ +/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p1" + +/* +** revd_mf8_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_m_tied12, svmfloat8_t, + z0 = svrevd_mf8_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_mf8_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_m_tied1, svmfloat8_t, + z0 = svrevd_mf8_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_mf8_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_m_tied2, svmfloat8_t, + z0 = svrevd_mf8_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_mf8_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_m_untied, svmfloat8_t, + z0 = svrevd_mf8_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_mf8_z_tied1, svmfloat8_t, + z0 = svrevd_mf8_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_mf8_z_untied: +** movi? [vdz]0\.?(?:[0-9]*[bhsd])?, #?0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_z_untied, svmfloat8_t, + z0 = svrevd_mf8_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_mf8_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_x_tied1, svmfloat8_t, + z0 = svrevd_mf8_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_mf8_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_mf8_x_untied, svmfloat8_t, + z0 = svrevd_mf8_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x2.c new file mode 100644 index 00000000000..489e4fff54d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x2.c @@ -0,0 +1,269 @@ +/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p1" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** stnt1_mf8_base: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_mf8_index: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_1: +** incb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + svcntb (), z0), + svstnt1 (pn8, x0 + svcntb (), z0)) + +/* +** stnt1_mf8_2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + svcntb () * 2, z0), + svstnt1 (pn8, x0 + svcntb () * 2, z0)) + +/* +** stnt1_mf8_14: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_14, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + svcntb () * 14, z0), + svstnt1 (pn8, x0 + svcntb () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_16: +** incb x0, all, mul #16 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_16, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 + svcntb () * 16, z0), + svstnt1 (pn8, x0 + svcntb () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_m1: +** decb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 - svcntb (), z0), + svstnt1 (pn8, x0 - svcntb (), z0)) + +/* +** stnt1_mf8_m2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 - svcntb () * 2, z0), + svstnt1 (pn8, x0 - svcntb () * 2, z0)) + +/* +** stnt1_mf8_m16: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m16, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 - svcntb () * 16, z0), + svstnt1 (pn8, x0 - svcntb () * 16, z0)) + +/* +** stnt1_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m18, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0 - svcntb () * 18, z0), + svstnt1 (pn8, x0 - svcntb () * 18, z0)) + +/* +** stnt1_mf8_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_mf8_z22: +** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_mf8_z28: +** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_mf8_pn15: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t, + svstnt1_mf8_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_mf8_0: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_1: +** incb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_mf8_2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_mf8_14: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_16: +** incb x0, all, mul #16 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_m1: +** decb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_mf8_m2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_mf8_m16: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_mf8_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t, + svstnt1_vnum_mf8_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x4.c new file mode 100644 index 00000000000..4be364514ab --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x4.c @@ -0,0 +1,361 @@ +/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p1" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** stnt1_mf8_base: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_mf8_index: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_1: +** incb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb (), z0), + svstnt1 (pn8, x0 + svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_2: +** incb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 2, z0), + svstnt1 (pn8, x0 + svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_3: +** incb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_3, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 3, z0), + svstnt1 (pn8, x0 + svcntb () * 3, z0)) + +/* +** stnt1_mf8_4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_4, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 4, z0), + svstnt1 (pn8, x0 + svcntb () * 4, z0)) + +/* +** stnt1_mf8_28: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_28, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 28, z0), + svstnt1 (pn8, x0 + svcntb () * 28, z0)) + +/* +** stnt1_mf8_32: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_32, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 + svcntb () * 32, z0), + svstnt1 (pn8, x0 + svcntb () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_m1: +** decb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb (), z0), + svstnt1 (pn8, x0 - svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_m2: +** decb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 2, z0), + svstnt1 (pn8, x0 - svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_mf8_m3: +** decb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m3, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 3, z0), + svstnt1 (pn8, x0 - svcntb () * 3, z0)) + +/* +** stnt1_mf8_m4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m4, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 4, z0), + svstnt1 (pn8, x0 - svcntb () * 4, z0)) + +/* +** stnt1_mf8_m32: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m32, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 32, z0), + svstnt1 (pn8, x0 - svcntb () * 32, z0)) + +/* +** stnt1_mf8_m36: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_m36, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0 - svcntb () * 36, z0), + svstnt1 (pn8, x0 - svcntb () * 36, z0)) + +/* +** stnt1_mf8_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_mf8_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_mf8_z28: +** stnt1b {z28\.b - z31\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_mf8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_mf8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_mf8_pn15: +** stnt1b {z0\.b - z3\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t, + svstnt1_mf8_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_mf8_0: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_1: +** incb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_2: +** incb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_3: +** incb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_mf8_4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_mf8_28: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_mf8_32: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_m1: +** decb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_m2: +** decb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_mf8_m3: +** decb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_mf8_m4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_mf8_m32: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_mf8_m36: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_mf8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t, + svstnt1_vnum_mf8_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0))