#define TYPES_d_za(S, D) \
S (za64)
-/* { _za8 } x { _s8 _u8 }
+/* { _za8 } x { _mf8 _s8 _u8 }
{ _za16 } x { _bf16 _f16 _s16 _u16 }
{ _za64 } x { _f64 _s64 _u64 }. */
#define TYPES_za_bhsd_data(S, D) \
- D (za8, s8), D (za8, u8), \
+ D (za8, mf8), D (za8, s8), D (za8, u8), \
D (za16, bf16), D (za16, f16), D (za16, s16), D (za16, u16), \
D (za32, f32), D (za32, s32), D (za32, u32), \
D (za64, f64), D (za64, s64), D (za64, u64)
z0 = svread_hor_za128_u8_m (z0, p0, 0, w0),
z0 = svread_hor_za128_m (z0, p0, 0, w0))
+/*
+** read_za128_mf8_0_w0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_mf8_0_w0_tied, svmfloat8_t,
+ z0 = svread_hor_za128_mf8_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
/*
** read_za128_u8_0_w0_untied:
** (
z0 = svread_hor_za128_u8_m (z1, p0, 0, w0),
z0 = svread_hor_za128_m (z1, p0, 0, w0))
+/*
+** read_za128_mf8_0_w0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** |
+** mov (w1[2-5]), w0
+** mova z1\.q, p0/m, za0h\.q\[\3, 0\]
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_mf8_0_w0_untied, svmfloat8_t,
+ z0 = svread_hor_za128_mf8_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
/*
** read_za128_s16_0_w0_tied:
** mov (w1[2-5]), w0
z0 = svread_hor_za8_u8_m (z0, p0, 0, w0),
z0 = svread_hor_za8_m (z0, p0, 0, w0))
+/*
+** read_za8_mf8_0_w0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0h\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_mf8_0_w0_tied, svmfloat8_t,
+ z0 = svread_hor_za8_mf8_m (z0, p0, 0, w0),
+ z0 = svread_hor_za8_m (z0, p0, 0, w0))
+
/*
** read_za8_u8_0_w0_untied:
** (
TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t,
z0 = svread_hor_za8_u8_m (z1, p0, 0, w0),
z0 = svread_hor_za8_m (z1, p0, 0, w0))
+
+/*
+** read_za8_mf8_0_w0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.b, p0/m, za0h\.b\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0h\.b\[\2, 0\]
+** |
+** mov (w1[2-5]), w0
+** mova z1\.b, p0/m, za0h\.b\[\3, 0\]
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_READ_ZA (read_za8_mf8_0_w0_untied, svmfloat8_t,
+ z0 = svread_hor_za8_mf8_m (z1, p0, 0, w0),
+ z0 = svread_hor_za8_m (z1, p0, 0, w0))
z0 = svread_ver_za128_u8_m (z0, p0, 0, w0),
z0 = svread_ver_za128_m (z0, p0, 0, w0))
+/*
+** read_za128_mf8_0_w0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_mf8_0_w0_tied, svmfloat8_t,
+ z0 = svread_ver_za128_mf8_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
/*
** read_za128_u8_0_w0_untied:
** (
z0 = svread_ver_za128_u8_m (z1, p0, 0, w0),
z0 = svread_ver_za128_m (z1, p0, 0, w0))
+/*
+** read_za128_mf8_0_w0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** |
+** mov (w1[2-5]), w0
+** mova z1\.q, p0/m, za0v\.q\[\3, 0\]
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_mf8_0_w0_untied, svmfloat8_t,
+ z0 = svread_ver_za128_mf8_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
/*
** read_za128_s16_0_w0_tied:
** mov (w1[2-5]), w0
z0 = svread_ver_za8_u8_m (z0, p0, 0, w0),
z0 = svread_ver_za8_m (z0, p0, 0, w0))
+/*
+** read_za8_mf8_0_w0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0v\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_mf8_0_w0_tied, svmfloat8_t,
+ z0 = svread_ver_za8_mf8_m (z0, p0, 0, w0),
+ z0 = svread_ver_za8_m (z0, p0, 0, w0))
+
/*
** read_za8_u8_0_w0_untied:
** (
TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t,
z0 = svread_ver_za8_u8_m (z1, p0, 0, w0),
z0 = svread_ver_za8_m (z1, p0, 0, w0))
+
+/*
+** read_za8_mf8_0_w0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.b, p0/m, za0v\.b\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0v\.b\[\2, 0\]
+** |
+** mov (w1[2-5]), w0
+** mova z1\.b, p0/m, za0v\.b\[\3, 0\]
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_READ_ZA (read_za8_mf8_0_w0_untied, svmfloat8_t,
+ z0 = svread_ver_za8_mf8_m (z1, p0, 0, w0),
+ z0 = svread_ver_za8_m (z1, p0, 0, w0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_mf8_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_m_tied12, svmfloat8_t,
+ z0 = svrevd_mf8_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_mf8_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_m_tied1, svmfloat8_t,
+ z0 = svrevd_mf8_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_mf8_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_m_tied2, svmfloat8_t,
+ z0 = svrevd_mf8_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_mf8_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_m_untied, svmfloat8_t,
+ z0 = svrevd_mf8_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_mf8_z_tied1, svmfloat8_t,
+ z0 = svrevd_mf8_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_mf8_z_untied:
+** movi? [vdz]0\.?(?:[0-9]*[bhsd])?, #?0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_z_untied, svmfloat8_t,
+ z0 = svrevd_mf8_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_mf8_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_x_tied1, svmfloat8_t,
+ z0 = svrevd_mf8_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_mf8_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_x_untied, svmfloat8_t,
+ z0 = svrevd_mf8_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
svwrite_hor_za128_u8_m (0, w0, p0, z0),
svwrite_hor_za128_m (0, w0, p0, z0))
+/*
+** write_za128_mf8_0_w0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_mf8_0_w0_z0, svmfloat8_t,
+ svwrite_hor_za128_mf8_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
/*
** write_za128_s16_0_w0_z0:
** mov (w1[2-5]), w0
TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t,
svwrite_hor_za8_u8_m (0, w0, p0, z0),
svwrite_hor_za8_m (0, w0, p0, z0))
+
+/*
+** write_za8_mf8_0_w0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_mf8_0_w0_z0, svmfloat8_t,
+ svwrite_hor_za8_mf8_m (0, w0, p0, z0),
+ svwrite_hor_za8_m (0, w0, p0, z0))
svwrite_ver_za128_u8_m (0, w0, p0, z0),
svwrite_ver_za128_m (0, w0, p0, z0))
+/*
+** write_za128_mf8_0_w0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_mf8_0_w0_z0, svmfloat8_t,
+ svwrite_ver_za128_mf8_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
/*
** write_za128_s16_0_w0_z0:
** mov (w1[2-5]), w0
TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t,
svwrite_ver_za8_u8_m (0, w0, p0, z0),
svwrite_ver_za8_m (0, w0, p0, z0))
+
+/*
+** write_za8_mf8_0_w0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_mf8_0_w0_z0, svmfloat8_t,
+ svwrite_ver_za8_mf8_m (0, w0, p0, z0),
+ svwrite_ver_za8_m (0, w0, p0, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_mf8_base:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_mf8_index:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + svcntb ()),
+ z0 = svld1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ld1_mf8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ld1_mf8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 - svcntb ()),
+ z0 = svld1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ld1_mf8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ld1_mf8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ld1_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ld1_mf8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x2_t, mfloat8_t,
+ z17 = svld1_mf8_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_mf8_z22:
+** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x2_t, mfloat8_t,
+ z22 = svld1_mf8_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_mf8_z28:
+** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x2_t, mfloat8_t,
+ z28 = svld1_mf8_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_mf8_pn15:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_mf8_0:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_mf8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_mf8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_mf8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_mf8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_mf8_base:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_mf8_index:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb ()),
+ z0 = svld1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ld1_mf8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ld1_mf8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ld1_mf8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb ()),
+ z0 = svld1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ld1_mf8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ld1_mf8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ld1_mf8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ld1_mf8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x4_t, mfloat8_t,
+ z17 = svld1_mf8_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_mf8_z22:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x4_t, mfloat8_t,
+ z22 = svld1_mf8_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_mf8_z28:
+** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x4_t, mfloat8_t,
+ z28 = svld1_mf8_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_mf8_pn15:
+** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_mf8_0:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_mf8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_mf8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_mf8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_mf8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_mf8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_mf8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_mf8_base:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_mf8_index:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ldnt1_mf8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ldnt1_mf8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ldnt1_mf8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ldnt1_mf8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ldnt1_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ldnt1_mf8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x2_t, mfloat8_t,
+ z17 = svldnt1_mf8_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_mf8_z22:
+** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x2_t, mfloat8_t,
+ z22 = svldnt1_mf8_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_mf8_z28:
+** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x2_t, mfloat8_t,
+ z28 = svldnt1_mf8_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_mf8_pn15:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_mf8_0:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_mf8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_mf8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_mf8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_mf8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_mf8_base:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_mf8_index:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ldnt1_mf8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ldnt1_mf8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ldnt1_mf8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ldnt1_mf8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ldnt1_mf8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ldnt1_mf8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ldnt1_mf8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x4_t, mfloat8_t,
+ z17 = svldnt1_mf8_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_mf8_z22:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x4_t, mfloat8_t,
+ z22 = svldnt1_mf8_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_mf8_z28:
+** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x4_t, mfloat8_t,
+ z28 = svldnt1_mf8_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_mf8_pn15:
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_mf8_0:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_mf8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_mf8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_mf8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_mf8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_mf8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_mf8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
z4 = svread_hor_za8_u8_vg2 (0, 1),
z4 = svread_hor_za8_u8_vg2 (0, 1))
+/*
+** read_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x2_t,
+ z4 = svread_hor_za8_mf8_vg2 (0, 1),
+ z4 = svread_hor_za8_mf8_vg2 (0, 1))
+
/*
** read_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
z18 = svread_hor_za8_u8_vg2 (0, w15),
z18 = svread_hor_za8_u8_vg2 (0, w15))
+/*
+** read_za8_mf8_z18_0_w15:
+** mova {z18\.b - z19\.b}, za0h\.b\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x2_t,
+ z18 = svread_hor_za8_mf8_vg2 (0, w15),
+ z18 = svread_hor_za8_mf8_vg2 (0, w15))
+
/*
** read_za8_s8_z23_0_w12p14:
** mova {[^\n]+}, za0h\.b\[w12, 14:15\]
z4 = svread_hor_za8_u8_vg2 (0, w12 + 1),
z4 = svread_hor_za8_u8_vg2 (0, w12 + 1))
+/*
+** read_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x2_t,
+ z4 = svread_hor_za8_mf8_vg2 (0, w12 + 1),
+ z4 = svread_hor_za8_mf8_vg2 (0, w12 + 1))
+
/*
** read_za8_s8_z28_0_w12p2:
** mova {z28\.b - z29\.b}, za0h\.b\[w12, 2:3\]
z0 = svread_hor_za8_u8_vg2 (0, w15 + 3),
z0 = svread_hor_za8_u8_vg2 (0, w15 + 3))
+/*
+** read_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x2_t,
+ z0 = svread_hor_za8_mf8_vg2 (0, w15 + 3),
+ z0 = svread_hor_za8_mf8_vg2 (0, w15 + 3))
+
/*
** read_za8_u8_z4_0_w15p12:
** mova {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\]
z4 = svread_hor_za8_u8_vg2 (0, w15 + 12),
z4 = svread_hor_za8_u8_vg2 (0, w15 + 12))
+/*
+** read_za8_mf8_z4_0_w15p12:
+** mova {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x2_t,
+ z4 = svread_hor_za8_mf8_vg2 (0, w15 + 12),
+ z4 = svread_hor_za8_mf8_vg2 (0, w15 + 12))
+
/*
** read_za8_u8_z28_0_w12p15:
** add (w[0-9]+), w12, #?15
z28 = svread_hor_za8_u8_vg2 (0, w12 + 15),
z28 = svread_hor_za8_u8_vg2 (0, w12 + 15))
+/*
+** read_za8_mf8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** mova {z28\.b - z29\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p15, svmfloat8x2_t,
+ z28 = svread_hor_za8_mf8_vg2 (0, w12 + 15),
+ z28 = svread_hor_za8_mf8_vg2 (0, w12 + 15))
+
/*
** read_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
z4 = svread_hor_za8_u8_vg2 (0, w12 - 1),
z4 = svread_hor_za8_u8_vg2 (0, w12 - 1))
+/*
+** read_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x2_t,
+ z4 = svread_hor_za8_mf8_vg2 (0, w12 - 1),
+ z4 = svread_hor_za8_mf8_vg2 (0, w12 - 1))
+
/*
** read_za8_u8_z18_0_w16:
** mov (w1[2-5]), w16
TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t,
z18 = svread_hor_za8_u8_vg2 (0, w16),
z18 = svread_hor_za8_u8_vg2 (0, w16))
+
+/*
+** read_za8_mf8_z18_0_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.b - z19\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w16, svmfloat8x2_t,
+ z18 = svread_hor_za8_mf8_vg2 (0, w16),
+ z18 = svread_hor_za8_mf8_vg2 (0, w16))
z4 = svread_hor_za8_u8_vg4 (0, 1),
z4 = svread_hor_za8_u8_vg4 (0, 1))
+/*
+** read_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x4_t,
+ z4 = svread_hor_za8_mf8_vg4 (0, 1),
+ z4 = svread_hor_za8_mf8_vg4 (0, 1))
+
/*
** read_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
z18 = svread_hor_za8_u8_vg4 (0, w15),
z18 = svread_hor_za8_u8_vg4 (0, w15))
+/*
+** read_za8_mf8_z18_0_w15:
+** mova {[^\n]+}, za0h\.b\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x4_t,
+ z18 = svread_hor_za8_mf8_vg4 (0, w15),
+ z18 = svread_hor_za8_mf8_vg4 (0, w15))
+
/*
** read_za8_s8_z23_0_w12p12:
** mova {[^\n]+}, za0h\.b\[w12, 12:15\]
z4 = svread_hor_za8_u8_vg4 (0, w12 + 1),
z4 = svread_hor_za8_u8_vg4 (0, w12 + 1))
+/*
+** read_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x4_t,
+ z4 = svread_hor_za8_mf8_vg4 (0, w12 + 1),
+ z4 = svread_hor_za8_mf8_vg4 (0, w12 + 1))
+
/*
** read_za8_s8_z28_0_w12p2:
** add (w[0-9]+), w12, #?2
z0 = svread_hor_za8_u8_vg4 (0, w15 + 3),
z0 = svread_hor_za8_u8_vg4 (0, w15 + 3))
+/*
+** read_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x4_t,
+ z0 = svread_hor_za8_mf8_vg4 (0, w15 + 3),
+ z0 = svread_hor_za8_mf8_vg4 (0, w15 + 3))
+
/*
** read_za8_u8_z0_0_w12p4:
** mova {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\]
z0 = svread_hor_za8_u8_vg4 (0, w12 + 4),
z0 = svread_hor_za8_u8_vg4 (0, w12 + 4))
+/*
+** read_za8_mf8_z0_0_w12p4:
+** mova {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w12p4, svmfloat8x4_t,
+ z0 = svread_hor_za8_mf8_vg4 (0, w12 + 4),
+ z0 = svread_hor_za8_mf8_vg4 (0, w12 + 4))
+
/*
** read_za8_u8_z4_0_w15p12:
** mova {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\]
z4 = svread_hor_za8_u8_vg4 (0, w15 + 12),
z4 = svread_hor_za8_u8_vg4 (0, w15 + 12))
+/*
+** read_za8_mf8_z4_0_w15p12:
+** mova {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x4_t,
+ z4 = svread_hor_za8_mf8_vg4 (0, w15 + 12),
+ z4 = svread_hor_za8_mf8_vg4 (0, w15 + 12))
+
/*
** read_za8_u8_z28_0_w12p14:
** add (w[0-9]+), w12, #?14
z28 = svread_hor_za8_u8_vg4 (0, w12 + 14),
z28 = svread_hor_za8_u8_vg4 (0, w12 + 14))
+/*
+** read_za8_mf8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p14, svmfloat8x4_t,
+ z28 = svread_hor_za8_mf8_vg4 (0, w12 + 14),
+ z28 = svread_hor_za8_mf8_vg4 (0, w12 + 14))
+
/*
** read_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
z4 = svread_hor_za8_u8_vg4 (0, w12 - 1),
z4 = svread_hor_za8_u8_vg4 (0, w12 - 1))
+/*
+** read_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x4_t,
+ z4 = svread_hor_za8_mf8_vg4 (0, w12 - 1),
+ z4 = svread_hor_za8_mf8_vg4 (0, w12 - 1))
+
/*
** read_za8_u8_z28_0_w16:
** mov (w1[2-5]), w16
TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t,
z28 = svread_hor_za8_u8_vg4 (0, w16),
z28 = svread_hor_za8_u8_vg4 (0, w16))
+
+/*
+** read_za8_u8_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w16, svmfloat8x4_t,
+ z28 = svread_hor_za8_mf8_vg4 (0, w16),
+ z28 = svread_hor_za8_mf8_vg4 (0, w16))
z4 = svread_ver_za8_u8_vg2 (0, 1),
z4 = svread_ver_za8_u8_vg2 (0, 1))
+/*
+** read_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x2_t,
+ z4 = svread_ver_za8_mf8_vg2 (0, 1),
+ z4 = svread_ver_za8_mf8_vg2 (0, 1))
+
/*
** read_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
z18 = svread_ver_za8_u8_vg2 (0, w15),
z18 = svread_ver_za8_u8_vg2 (0, w15))
+/*
+** read_za8_mf8_z18_0_w15:
+** mova {z18\.b - z19\.b}, za0v\.b\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x2_t,
+ z18 = svread_ver_za8_mf8_vg2 (0, w15),
+ z18 = svread_ver_za8_mf8_vg2 (0, w15))
+
/*
** read_za8_s8_z23_0_w12p14:
** mova {[^\n]+}, za0v\.b\[w12, 14:15\]
z4 = svread_ver_za8_u8_vg2 (0, w12 + 1),
z4 = svread_ver_za8_u8_vg2 (0, w12 + 1))
+/*
+** read_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x2_t,
+ z4 = svread_ver_za8_mf8_vg2 (0, w12 + 1),
+ z4 = svread_ver_za8_mf8_vg2 (0, w12 + 1))
+
/*
** read_za8_s8_z28_0_w12p2:
** mova {z28\.b - z29\.b}, za0v\.b\[w12, 2:3\]
z0 = svread_ver_za8_u8_vg2 (0, w15 + 3),
z0 = svread_ver_za8_u8_vg2 (0, w15 + 3))
+/*
+** read_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x2_t,
+ z0 = svread_ver_za8_mf8_vg2 (0, w15 + 3),
+ z0 = svread_ver_za8_mf8_vg2 (0, w15 + 3))
+
/*
** read_za8_u8_z4_0_w15p12:
** mova {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\]
z4 = svread_ver_za8_u8_vg2 (0, w15 + 12),
z4 = svread_ver_za8_u8_vg2 (0, w15 + 12))
+/*
+** read_za8_mf8_z4_0_w15p12:
+** mova {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x2_t,
+ z4 = svread_ver_za8_mf8_vg2 (0, w15 + 12),
+ z4 = svread_ver_za8_mf8_vg2 (0, w15 + 12))
+
/*
** read_za8_u8_z28_0_w12p15:
** add (w[0-9]+), w12, #?15
z28 = svread_ver_za8_u8_vg2 (0, w12 + 15),
z28 = svread_ver_za8_u8_vg2 (0, w12 + 15))
+/*
+** read_za8_mf8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** mova {z28\.b - z29\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p15, svmfloat8x2_t,
+ z28 = svread_ver_za8_mf8_vg2 (0, w12 + 15),
+ z28 = svread_ver_za8_mf8_vg2 (0, w12 + 15))
+
/*
** read_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
z4 = svread_ver_za8_u8_vg2 (0, w12 - 1),
z4 = svread_ver_za8_u8_vg2 (0, w12 - 1))
+/*
+** read_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x2_t,
+ z4 = svread_ver_za8_mf8_vg2 (0, w12 - 1),
+ z4 = svread_ver_za8_mf8_vg2 (0, w12 - 1))
+
/*
** read_za8_u8_z18_0_w16:
** mov (w1[2-5]), w16
TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t,
z18 = svread_ver_za8_u8_vg2 (0, w16),
z18 = svread_ver_za8_u8_vg2 (0, w16))
+
+/*
+** read_za8_mf8_z18_0_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.b - z19\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w16, svmfloat8x2_t,
+ z18 = svread_ver_za8_mf8_vg2 (0, w16),
+ z18 = svread_ver_za8_mf8_vg2 (0, w16))
z4 = svread_ver_za8_u8_vg4 (0, 1),
z4 = svread_ver_za8_u8_vg4 (0, 1))
+/*
+** read_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x4_t,
+ z4 = svread_ver_za8_mf8_vg4 (0, 1),
+ z4 = svread_ver_za8_mf8_vg4 (0, 1))
+
/*
** read_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
z18 = svread_ver_za8_u8_vg4 (0, w15),
z18 = svread_ver_za8_u8_vg4 (0, w15))
+/*
+** read_za8_mf8_z18_0_w15:
+** mova {[^\n]+}, za0v\.b\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x4_t,
+ z18 = svread_ver_za8_mf8_vg4 (0, w15),
+ z18 = svread_ver_za8_mf8_vg4 (0, w15))
+
/*
** read_za8_s8_z23_0_w12p12:
** mova {[^\n]+}, za0v\.b\[w12, 12:15\]
z4 = svread_ver_za8_u8_vg4 (0, w12 + 1),
z4 = svread_ver_za8_u8_vg4 (0, w12 + 1))
+/*
+** read_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x4_t,
+ z4 = svread_ver_za8_mf8_vg4 (0, w12 + 1),
+ z4 = svread_ver_za8_mf8_vg4 (0, w12 + 1))
+
/*
** read_za8_s8_z28_0_w12p2:
** add (w[0-9]+), w12, #?2
z0 = svread_ver_za8_u8_vg4 (0, w15 + 3),
z0 = svread_ver_za8_u8_vg4 (0, w15 + 3))
+/*
+** read_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x4_t,
+ z0 = svread_ver_za8_mf8_vg4 (0, w15 + 3),
+ z0 = svread_ver_za8_mf8_vg4 (0, w15 + 3))
+
/*
** read_za8_u8_z0_0_w12p4:
** mova {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\]
z0 = svread_ver_za8_u8_vg4 (0, w12 + 4),
z0 = svread_ver_za8_u8_vg4 (0, w12 + 4))
+/*
+** read_za8_mf8_z0_0_w12p4:
+** mova {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w12p4, svmfloat8x4_t,
+ z0 = svread_ver_za8_mf8_vg4 (0, w12 + 4),
+ z0 = svread_ver_za8_mf8_vg4 (0, w12 + 4))
+
/*
** read_za8_u8_z4_0_w15p12:
** mova {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\]
z4 = svread_ver_za8_u8_vg4 (0, w15 + 12),
z4 = svread_ver_za8_u8_vg4 (0, w15 + 12))
+/*
+** read_za8_mf8_z4_0_w15p12:
+** mova {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x4_t,
+ z4 = svread_ver_za8_mf8_vg4 (0, w15 + 12),
+ z4 = svread_ver_za8_mf8_vg4 (0, w15 + 12))
+
/*
** read_za8_u8_z28_0_w12p14:
** add (w[0-9]+), w12, #?14
z28 = svread_ver_za8_u8_vg4 (0, w12 + 14),
z28 = svread_ver_za8_u8_vg4 (0, w12 + 14))
+/*
+** read_za8_mf8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p14, svmfloat8x4_t,
+ z28 = svread_ver_za8_mf8_vg4 (0, w12 + 14),
+ z28 = svread_ver_za8_mf8_vg4 (0, w12 + 14))
+
/*
** read_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
z4 = svread_ver_za8_u8_vg4 (0, w12 - 1),
z4 = svread_ver_za8_u8_vg4 (0, w12 - 1))
+/*
+** read_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x4_t,
+ z4 = svread_ver_za8_mf8_vg4 (0, w12 - 1),
+ z4 = svread_ver_za8_mf8_vg4 (0, w12 - 1))
+
/*
** read_za8_u8_z28_0_w16:
** mov (w1[2-5]), w16
TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t,
z28 = svread_ver_za8_u8_vg4 (0, w16),
z28 = svread_ver_za8_u8_vg4 (0, w16))
+
+/*
+** read_za8_mf8_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w16, svmfloat8x4_t,
+ z28 = svread_ver_za8_mf8_vg4 (0, w16),
+ z28 = svread_ver_za8_mf8_vg4 (0, w16))
z0 = svread_za8_u8_vg1x2 (w7),
z0 = svread_za8_u8_vg1x2 (w7))
+/*
+** read_mf8_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w7_z0, svmfloat8x2_t,
+ z0 = svread_za8_mf8_vg1x2 (w7),
+ z0 = svread_za8_mf8_vg1x2 (w7))
+
/*
** read_w8_z0:
** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\]
z0 = svread_za8_u8_vg1x2 (w12),
z0 = svread_za8_u8_vg1x2 (w12))
+/*
+** read_mf8_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w12_z0, svmfloat8x2_t,
+ z0 = svread_za8_mf8_vg1x2 (w12),
+ z0 = svread_za8_mf8_vg1x2 (w12))
+
/*
** read_w8p7_z0:
** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\]
z0 = svread_za8_u8_vg1x2 (w8 - 1),
z0 = svread_za8_u8_vg1x2 (w8 - 1))
+/*
+** read_mf8_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w8m1_z0, svmfloat8x2_t,
+ z0 = svread_za8_mf8_vg1x2 (w8 - 1),
+ z0 = svread_za8_mf8_vg1x2 (w8 - 1))
+
/*
** read_w8_z18:
** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\]
z18 = svread_za8_u8_vg1x2 (w8),
z18 = svread_za8_u8_vg1x2 (w8))
+/*
+** read_mf8_w8_z18:
+** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w8_z18, svmfloat8x2_t,
+ z18 = svread_za8_mf8_vg1x2 (w8),
+ z18 = svread_za8_mf8_vg1x2 (w8))
+
/* Leave the assembler to check for correctness for misaligned registers. */
/*
TEST_READ_ZA_XN (read_w8_z28, svuint8x2_t,
z28 = svread_za8_u8_vg1x2 (w8),
z28 = svread_za8_u8_vg1x2 (w8))
+
+/*
+** read_mf8_w8_z28:
+** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w8_z28, svmfloat8x2_t,
+ z28 = svread_za8_mf8_vg1x2 (w8),
+ z28 = svread_za8_mf8_vg1x2 (w8))
z0 = svread_za8_u8_vg1x4 (w0),
z0 = svread_za8_u8_vg1x4 (w0))
+/*
+** read_mf8_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w0_z0, svmfloat8x4_t,
+ z0 = svread_za8_mf8_vg1x4 (w0),
+ z0 = svread_za8_mf8_vg1x4 (w0))
+
/*
** read_w7_z0:
** mov (w8|w9|w10|w11), w7
z0 = svread_za8_u8_vg1x4 (w11),
z0 = svread_za8_u8_vg1x4 (w11))
+/*
+** read_mf8_w11_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w11_z0, svmfloat8x4_t,
+ z0 = svread_za8_mf8_vg1x4 (w11),
+ z0 = svread_za8_mf8_vg1x4 (w11))
/*
** read_w12_z0:
z0 = svread_za8_u8_vg1x4 (w8 + 8),
z0 = svread_za8_u8_vg1x4 (w8 + 8))
+/*
+** read_mf8_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w8p8_z0, svmfloat8x4_t,
+ z0 = svread_za8_mf8_vg1x4 (w8 + 8),
+ z0 = svread_za8_mf8_vg1x4 (w8 + 8))
+
/*
** read_w8m1_z0:
** sub (w8|w9|w10|w11), w8, #?1
z18 = svread_za8_u8_vg1x4 (w8),
z18 = svread_za8_u8_vg1x4 (w8))
+/*
+** read_mf8_w8_z18:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w8_z18, svmfloat8x4_t,
+ z18 = svread_za8_mf8_vg1x4 (w8),
+ z18 = svread_za8_mf8_vg1x4 (w8))
+
/*
** read_w8_z23:
** mova [^\n]+, za\.d\[w8, 0, vgx4\]
z23 = svread_za8_u8_vg1x4 (w8),
z23 = svread_za8_u8_vg1x4 (w8))
+/*
+** read_mf8_w8_z23:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_mf8_w8_z23, svmfloat8x4_t,
+ z23 = svread_za8_mf8_vg1x4 (w8),
+ z23 = svread_za8_mf8_vg1x4 (w8))
+
/*
** read_w8_z28:
** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\]
z0 = svreadz_hor_za128_u8 (0, w0),
z0 = svreadz_hor_za128_u8 (0, w0))
+/*
+** readz_za128_mf8_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_mf8_0_w0, svmfloat8_t,
+ z0 = svreadz_hor_za128_mf8 (0, w0),
+ z0 = svreadz_hor_za128_mf8 (0, w0))
+
/*
** readz_za128_s16_0_w0:
** mov (w1[2-5]), w0
TEST_READ_ZA (readz_za8_u8_0_w0, svuint8_t,
z0 = svreadz_hor_za8_u8 (0, w0),
z0 = svreadz_hor_za8_u8 (0, w0))
+
+/*
+** readz_za8_mf8_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.b, za0h\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za8_mf8_0_w0, svmfloat8_t,
+ z0 = svreadz_hor_za8_mf8 (0, w0),
+ z0 = svreadz_hor_za8_mf8 (0, w0))
z4 = svreadz_hor_za8_u8_vg2 (0, 1),
z4 = svreadz_hor_za8_u8_vg2 (0, 1))
+/*
+** readz_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** movaz {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x2_t,
+ z4 = svreadz_hor_za8_mf8_vg2 (0, 1),
+ z4 = svreadz_hor_za8_mf8_vg2 (0, 1))
+
/*
** readz_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
z18 = svreadz_hor_za8_u8_vg2 (0, w15),
z18 = svreadz_hor_za8_u8_vg2 (0, w15))
+/*
+** readz_za8_mf8_z18_0_w15:
+** movaz {z18\.b - z19\.b}, za0h\.b\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x2_t,
+ z18 = svreadz_hor_za8_mf8_vg2 (0, w15),
+ z18 = svreadz_hor_za8_mf8_vg2 (0, w15))
+
/*
** readz_za8_s8_z23_0_w12p14:
** movaz {[^\n]+}, za0h\.b\[w12, 14:15\]
z4 = svreadz_hor_za8_u8_vg2 (0, w12 + 1),
z4 = svreadz_hor_za8_u8_vg2 (0, w12 + 1))
+/*
+** readz_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** movaz {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x2_t,
+ z4 = svreadz_hor_za8_mf8_vg2 (0, w12 + 1),
+ z4 = svreadz_hor_za8_mf8_vg2 (0, w12 + 1))
+
/*
** readz_za8_s8_z28_0_w12p2:
** movaz {z28\.b - z29\.b}, za0h\.b\[w12, 2:3\]
z0 = svreadz_hor_za8_u8_vg2 (0, w15 + 3),
z0 = svreadz_hor_za8_u8_vg2 (0, w15 + 3))
+/*
+** readz_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** movaz {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x2_t,
+ z0 = svreadz_hor_za8_mf8_vg2 (0, w15 + 3),
+ z0 = svreadz_hor_za8_mf8_vg2 (0, w15 + 3))
+
/*
** readz_za8_u8_z4_0_w15p12:
** movaz {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\]
z4 = svreadz_hor_za8_u8_vg2 (0, w15 + 12),
z4 = svreadz_hor_za8_u8_vg2 (0, w15 + 12))
+/*
+** readz_za8_mf8_z4_0_w15p12:
+** movaz {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x2_t,
+ z4 = svreadz_hor_za8_mf8_vg2 (0, w15 + 12),
+ z4 = svreadz_hor_za8_mf8_vg2 (0, w15 + 12))
+
/*
** readz_za8_u8_z28_0_w12p15:
** add (w[0-9]+), w12, #?15
z28 = svreadz_hor_za8_u8_vg2 (0, w12 + 15),
z28 = svreadz_hor_za8_u8_vg2 (0, w12 + 15))
+/*
+** readz_za8_mf8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** movaz {z28\.b - z29\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p15, svmfloat8x2_t,
+ z28 = svreadz_hor_za8_mf8_vg2 (0, w12 + 15),
+ z28 = svreadz_hor_za8_mf8_vg2 (0, w12 + 15))
+
/*
** readz_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
z4 = svreadz_hor_za8_u8_vg2 (0, w12 - 1),
z4 = svreadz_hor_za8_u8_vg2 (0, w12 - 1))
+/*
+** readz_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** movaz {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x2_t,
+ z4 = svreadz_hor_za8_mf8_vg2 (0, w12 - 1),
+ z4 = svreadz_hor_za8_mf8_vg2 (0, w12 - 1))
+
/*
** readz_za8_u8_z18_0_w16:
** mov (w1[2-5]), w16
TEST_READ_ZA_XN (readz_za8_u8_z18_0_w16, svuint8x2_t,
z18 = svreadz_hor_za8_u8_vg2 (0, w16),
z18 = svreadz_hor_za8_u8_vg2 (0, w16))
+
+/*
+** readz_za8_mf8_z18_0_w16:
+** mov (w1[2-5]), w16
+** movaz {z18\.b - z19\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w16, svmfloat8x2_t,
+ z18 = svreadz_hor_za8_mf8_vg2 (0, w16),
+ z18 = svreadz_hor_za8_mf8_vg2 (0, w16))
z4 = svreadz_hor_za8_u8_vg4 (0, 1),
z4 = svreadz_hor_za8_u8_vg4 (0, 1))
+/*
+** readz_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** movaz {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x4_t,
+ z4 = svreadz_hor_za8_mf8_vg4 (0, 1),
+ z4 = svreadz_hor_za8_mf8_vg4 (0, 1))
+
/*
** readz_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
z18 = svreadz_hor_za8_u8_vg4 (0, w15),
z18 = svreadz_hor_za8_u8_vg4 (0, w15))
+/*
+** readz_za8_mf8_z18_0_w15:
+** movaz {[^\n]+}, za0h\.b\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x4_t,
+ z18 = svreadz_hor_za8_mf8_vg4 (0, w15),
+ z18 = svreadz_hor_za8_mf8_vg4 (0, w15))
+
/*
** readz_za8_s8_z23_0_w12p12:
** movaz {[^\n]+}, za0h\.b\[w12, 12:15\]
z4 = svreadz_hor_za8_u8_vg4 (0, w12 + 1),
z4 = svreadz_hor_za8_u8_vg4 (0, w12 + 1))
+/*
+** readz_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** movaz {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x4_t,
+ z4 = svreadz_hor_za8_mf8_vg4 (0, w12 + 1),
+ z4 = svreadz_hor_za8_mf8_vg4 (0, w12 + 1))
+
/*
** readz_za8_s8_z28_0_w12p2:
** add (w[0-9]+), w12, #?2
z0 = svreadz_hor_za8_u8_vg4 (0, w15 + 3),
z0 = svreadz_hor_za8_u8_vg4 (0, w15 + 3))
+/*
+** readz_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** movaz {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x4_t,
+ z0 = svreadz_hor_za8_mf8_vg4 (0, w15 + 3),
+ z0 = svreadz_hor_za8_mf8_vg4 (0, w15 + 3))
+
/*
** readz_za8_u8_z0_0_w12p4:
** movaz {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\]
z0 = svreadz_hor_za8_u8_vg4 (0, w12 + 4),
z0 = svreadz_hor_za8_u8_vg4 (0, w12 + 4))
+/*
+** readz_za8_mf8_z0_0_w12p4:
+** movaz {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w12p4, svmfloat8x4_t,
+ z0 = svreadz_hor_za8_mf8_vg4 (0, w12 + 4),
+ z0 = svreadz_hor_za8_mf8_vg4 (0, w12 + 4))
+
/*
** readz_za8_u8_z4_0_w15p12:
** movaz {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\]
z4 = svreadz_hor_za8_u8_vg4 (0, w15 + 12),
z4 = svreadz_hor_za8_u8_vg4 (0, w15 + 12))
+/*
+** readz_za8_mf8_z4_0_w15p12:
+** movaz {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x4_t,
+ z4 = svreadz_hor_za8_mf8_vg4 (0, w15 + 12),
+ z4 = svreadz_hor_za8_mf8_vg4 (0, w15 + 12))
+
/*
** readz_za8_u8_z28_0_w12p14:
** add (w[0-9]+), w12, #?14
z28 = svreadz_hor_za8_u8_vg4 (0, w12 + 14),
z28 = svreadz_hor_za8_u8_vg4 (0, w12 + 14))
+/*
+** readz_za8_mf8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** movaz {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p14, svmfloat8x4_t,
+ z28 = svreadz_hor_za8_mf8_vg4 (0, w12 + 14),
+ z28 = svreadz_hor_za8_mf8_vg4 (0, w12 + 14))
+
/*
** readz_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
z4 = svreadz_hor_za8_u8_vg4 (0, w12 - 1),
z4 = svreadz_hor_za8_u8_vg4 (0, w12 - 1))
+/*
+** readz_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** movaz {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x4_t,
+ z4 = svreadz_hor_za8_mf8_vg4 (0, w12 - 1),
+ z4 = svreadz_hor_za8_mf8_vg4 (0, w12 - 1))
+
/*
** readz_za8_u8_z28_0_w16:
** mov (w1[2-5]), w16
TEST_READ_ZA_XN (readz_za8_u8_z28_0_w16, svuint8x4_t,
z28 = svreadz_hor_za8_u8_vg4 (0, w16),
z28 = svreadz_hor_za8_u8_vg4 (0, w16))
+
+/*
+** readz_za8_mf8_z28_0_w16:
+** mov (w1[2-5]), w16
+** movaz {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w16, svmfloat8x4_t,
+ z28 = svreadz_hor_za8_mf8_vg4 (0, w16),
+ z28 = svreadz_hor_za8_mf8_vg4 (0, w16))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sme2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme2p1"
+
+/*
+** readz_za128_s8_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s8_0_0, svint8_t,
+ z0 = svreadz_ver_za128_s8 (0, 0),
+ z0 = svreadz_ver_za128_s8 (0, 0))
+
+/*
+** readz_za128_s8_0_1:
+** mov (w1[2-5]), #?1
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s8_0_1, svint8_t,
+ z0 = svreadz_ver_za128_s8 (0, 1),
+ z0 = svreadz_ver_za128_s8 (0, 1))
+
+/*
+** readz_za128_s8_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s8_0_w0, svint8_t,
+ z0 = svreadz_ver_za128_s8 (0, w0),
+ z0 = svreadz_ver_za128_s8 (0, w0))
+
+/*
+** readz_za128_s8_0_w0p1:
+** add (w1[2-5]), w0, #?1
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s8_0_w0p1, svint8_t,
+ z0 = svreadz_ver_za128_s8 (0, w0 + 1),
+ z0 = svreadz_ver_za128_s8 (0, w0 + 1))
+
+/*
+** readz_za128_s8_0_w0m1:
+** sub (w1[2-5]), w0, #?1
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s8_0_w0m1, svint8_t,
+ z0 = svreadz_ver_za128_s8 (0, w0 - 1),
+ z0 = svreadz_ver_za128_s8 (0, w0 - 1))
+
+/*
+** readz_za128_s8_1_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za1v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s8_1_w0, svint8_t,
+ z0 = svreadz_ver_za128_s8 (1, w0),
+ z0 = svreadz_ver_za128_s8 (1, w0))
+
+/*
+** readz_za128_s8_15_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za15v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s8_15_w0, svint8_t,
+ z0 = svreadz_ver_za128_s8 (15, w0),
+ z0 = svreadz_ver_za128_s8 (15, w0))
+
+/*
+** readz_za128_u8_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_u8_0_w0, svuint8_t,
+ z0 = svreadz_ver_za128_u8 (0, w0),
+ z0 = svreadz_ver_za128_u8 (0, w0))
+
+/*
+** readz_za128_mf8_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_mf8_0_w0, svmfloat8_t,
+ z0 = svreadz_ver_za128_mf8 (0, w0),
+ z0 = svreadz_ver_za128_mf8 (0, w0))
+
+/*
+** readz_za128_s16_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s16_0_w0, svint16_t,
+ z0 = svreadz_ver_za128_s16 (0, w0),
+ z0 = svreadz_ver_za128_s16 (0, w0))
+
+/*
+** readz_za128_u16_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_u16_0_w0, svuint16_t,
+ z0 = svreadz_ver_za128_u16 (0, w0),
+ z0 = svreadz_ver_za128_u16 (0, w0))
+
+/*
+** readz_za128_f16_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_f16_0_w0, svfloat16_t,
+ z0 = svreadz_ver_za128_f16 (0, w0),
+ z0 = svreadz_ver_za128_f16 (0, w0))
+
+/*
+** readz_za128_bf16_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_bf16_0_w0, svbfloat16_t,
+ z0 = svreadz_ver_za128_bf16 (0, w0),
+ z0 = svreadz_ver_za128_bf16 (0, w0))
+
+/*
+** readz_za128_s32_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s32_0_w0, svint32_t,
+ z0 = svreadz_ver_za128_s32 (0, w0),
+ z0 = svreadz_ver_za128_s32 (0, w0))
+
+/*
+** readz_za128_u32_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_u32_0_w0, svuint32_t,
+ z0 = svreadz_ver_za128_u32 (0, w0),
+ z0 = svreadz_ver_za128_u32 (0, w0))
+
+/*
+** readz_za128_f32_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_f32_0_w0, svfloat32_t,
+ z0 = svreadz_ver_za128_f32 (0, w0),
+ z0 = svreadz_ver_za128_f32 (0, w0))
+
+/*
+** readz_za128_s64_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_s64_0_w0, svint64_t,
+ z0 = svreadz_ver_za128_s64 (0, w0),
+ z0 = svreadz_ver_za128_s64 (0, w0))
+
+/*
+** readz_za128_u64_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_u64_0_w0, svuint64_t,
+ z0 = svreadz_ver_za128_u64 (0, w0),
+ z0 = svreadz_ver_za128_u64 (0, w0))
+
+/*
+** readz_za128_f64_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.q, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za128_f64_0_w0, svfloat64_t,
+ z0 = svreadz_ver_za128_f64 (0, w0),
+ z0 = svreadz_ver_za128_f64 (0, w0))
TEST_READ_ZA (readz_za8_u8_0_w0, svuint8_t,
z0 = svreadz_ver_za8_u8 (0, w0),
z0 = svreadz_ver_za8_u8 (0, w0))
+
+/*
+** readz_za8_mf8_0_w0:
+** mov (w1[2-5]), w0
+** movaz z0\.b, za0v\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (readz_za8_mf8_0_w0, svmfloat8_t,
+ z0 = svreadz_ver_za8_mf8 (0, w0),
+ z0 = svreadz_ver_za8_mf8 (0, w0))
z4 = svreadz_ver_za8_u8_vg2 (0, 1),
z4 = svreadz_ver_za8_u8_vg2 (0, 1))
+/*
+** readz_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** movaz {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x2_t,
+ z4 = svreadz_ver_za8_mf8_vg2 (0, 1),
+ z4 = svreadz_ver_za8_mf8_vg2 (0, 1))
+
/*
** readz_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
z18 = svreadz_ver_za8_u8_vg2 (0, w15),
z18 = svreadz_ver_za8_u8_vg2 (0, w15))
+/*
+** readz_za8_mf8_z18_0_w15:
+** movaz {z18\.b - z19\.b}, za0v\.b\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x2_t,
+ z18 = svreadz_ver_za8_mf8_vg2 (0, w15),
+ z18 = svreadz_ver_za8_mf8_vg2 (0, w15))
+
/*
** readz_za8_s8_z23_0_w12p14:
** movaz {[^\n]+}, za0v\.b\[w12, 14:15\]
z4 = svreadz_ver_za8_u8_vg2 (0, w12 + 1),
z4 = svreadz_ver_za8_u8_vg2 (0, w12 + 1))
+/*
+** readz_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** movaz {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x2_t,
+ z4 = svreadz_ver_za8_mf8_vg2 (0, w12 + 1),
+ z4 = svreadz_ver_za8_mf8_vg2 (0, w12 + 1))
+
/*
** readz_za8_s8_z28_0_w12p2:
** movaz {z28\.b - z29\.b}, za0v\.b\[w12, 2:3\]
z0 = svreadz_ver_za8_u8_vg2 (0, w15 + 3),
z0 = svreadz_ver_za8_u8_vg2 (0, w15 + 3))
+/*
+** readz_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** movaz {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x2_t,
+ z0 = svreadz_ver_za8_mf8_vg2 (0, w15 + 3),
+ z0 = svreadz_ver_za8_mf8_vg2 (0, w15 + 3))
+
/*
** readz_za8_u8_z4_0_w15p12:
** movaz {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\]
z4 = svreadz_ver_za8_u8_vg2 (0, w15 + 12),
z4 = svreadz_ver_za8_u8_vg2 (0, w15 + 12))
+/*
+** readz_za8_mf8_z4_0_w15p12:
+** movaz {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x2_t,
+ z4 = svreadz_ver_za8_mf8_vg2 (0, w15 + 12),
+ z4 = svreadz_ver_za8_mf8_vg2 (0, w15 + 12))
+
/*
** readz_za8_u8_z28_0_w12p15:
** add (w[0-9]+), w12, #?15
z28 = svreadz_ver_za8_u8_vg2 (0, w12 + 15),
z28 = svreadz_ver_za8_u8_vg2 (0, w12 + 15))
+/*
+** readz_za8_mf8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** movaz {z28\.b - z29\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p15, svmfloat8x2_t,
+ z28 = svreadz_ver_za8_mf8_vg2 (0, w12 + 15),
+ z28 = svreadz_ver_za8_mf8_vg2 (0, w12 + 15))
+
/*
** readz_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
z4 = svreadz_ver_za8_u8_vg2 (0, w12 - 1),
z4 = svreadz_ver_za8_u8_vg2 (0, w12 - 1))
+/*
+** readz_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** movaz {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x2_t,
+ z4 = svreadz_ver_za8_mf8_vg2 (0, w12 - 1),
+ z4 = svreadz_ver_za8_mf8_vg2 (0, w12 - 1))
+
/*
** readz_za8_u8_z18_0_w16:
** mov (w1[2-5]), w16
TEST_READ_ZA_XN (readz_za8_u8_z18_0_w16, svuint8x2_t,
z18 = svreadz_ver_za8_u8_vg2 (0, w16),
z18 = svreadz_ver_za8_u8_vg2 (0, w16))
+/*
+** readz_za8_mf8_z18_0_w16:
+** mov (w1[2-5]), w16
+** movaz {z18\.b - z19\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w16, svmfloat8x2_t,
+ z18 = svreadz_ver_za8_mf8_vg2 (0, w16),
+ z18 = svreadz_ver_za8_mf8_vg2 (0, w16))
z4 = svreadz_ver_za8_u8_vg4 (0, 1),
z4 = svreadz_ver_za8_u8_vg4 (0, 1))
+/*
+** readz_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** movaz {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x4_t,
+ z4 = svreadz_ver_za8_mf8_vg4 (0, 1),
+ z4 = svreadz_ver_za8_mf8_vg4 (0, 1))
+
/*
** readz_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
z18 = svreadz_ver_za8_u8_vg4 (0, w15),
z18 = svreadz_ver_za8_u8_vg4 (0, w15))
+/*
+** readz_za8_mf8_z18_0_w15:
+** movaz {[^\n]+}, za0v\.b\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x4_t,
+ z18 = svreadz_ver_za8_mf8_vg4 (0, w15),
+ z18 = svreadz_ver_za8_mf8_vg4 (0, w15))
+
/*
** readz_za8_s8_z23_0_w12p12:
** movaz {[^\n]+}, za0v\.b\[w12, 12:15\]
z4 = svreadz_ver_za8_u8_vg4 (0, w12 + 1),
z4 = svreadz_ver_za8_u8_vg4 (0, w12 + 1))
+/*
+** readz_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** movaz {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x4_t,
+ z4 = svreadz_ver_za8_mf8_vg4 (0, w12 + 1),
+ z4 = svreadz_ver_za8_mf8_vg4 (0, w12 + 1))
+
/*
** readz_za8_s8_z28_0_w12p2:
** add (w[0-9]+), w12, #?2
z0 = svreadz_ver_za8_u8_vg4 (0, w15 + 3),
z0 = svreadz_ver_za8_u8_vg4 (0, w15 + 3))
+/*
+** readz_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** movaz {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x4_t,
+ z0 = svreadz_ver_za8_mf8_vg4 (0, w15 + 3),
+ z0 = svreadz_ver_za8_mf8_vg4 (0, w15 + 3))
+
/*
** readz_za8_u8_z0_0_w12p4:
** movaz {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\]
z0 = svreadz_ver_za8_u8_vg4 (0, w12 + 4),
z0 = svreadz_ver_za8_u8_vg4 (0, w12 + 4))
+/*
+** readz_za8_mf8_z0_0_w12p4:
+** movaz {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w12p4, svmfloat8x4_t,
+ z0 = svreadz_ver_za8_mf8_vg4 (0, w12 + 4),
+ z0 = svreadz_ver_za8_mf8_vg4 (0, w12 + 4))
+
/*
** readz_za8_u8_z4_0_w15p12:
** movaz {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\]
z4 = svreadz_ver_za8_u8_vg4 (0, w15 + 12),
z4 = svreadz_ver_za8_u8_vg4 (0, w15 + 12))
+/*
+** readz_za8_mf8_z4_0_w15p12:
+** movaz {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x4_t,
+ z4 = svreadz_ver_za8_mf8_vg4 (0, w15 + 12),
+ z4 = svreadz_ver_za8_mf8_vg4 (0, w15 + 12))
+
/*
** readz_za8_u8_z28_0_w12p14:
** add (w[0-9]+), w12, #?14
z28 = svreadz_ver_za8_u8_vg4 (0, w12 + 14),
z28 = svreadz_ver_za8_u8_vg4 (0, w12 + 14))
+/*
+** readz_za8_mf8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** movaz {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p14, svmfloat8x4_t,
+ z28 = svreadz_ver_za8_mf8_vg4 (0, w12 + 14),
+ z28 = svreadz_ver_za8_mf8_vg4 (0, w12 + 14))
+
/*
** readz_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
z4 = svreadz_ver_za8_u8_vg4 (0, w12 - 1),
z4 = svreadz_ver_za8_u8_vg4 (0, w12 - 1))
+/*
+** readz_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** movaz {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x4_t,
+ z4 = svreadz_ver_za8_mf8_vg4 (0, w12 - 1),
+ z4 = svreadz_ver_za8_mf8_vg4 (0, w12 - 1))
+
/*
** readz_za8_u8_z28_0_w16:
** mov (w1[2-5]), w16
TEST_READ_ZA_XN (readz_za8_u8_z28_0_w16, svuint8x4_t,
z28 = svreadz_ver_za8_u8_vg4 (0, w16),
z28 = svreadz_ver_za8_u8_vg4 (0, w16))
+/*
+** readz_za8_mf8_z28_0_w16:
+** mov (w1[2-5]), w16
+** movaz {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w16, svmfloat8x4_t,
+ z28 = svreadz_ver_za8_mf8_vg4 (0, w16),
+ z28 = svreadz_ver_za8_mf8_vg4 (0, w16))
z0 = svreadz_za8_u8_vg1x2 (w7),
z0 = svreadz_za8_u8_vg1x2 (w7))
+/*
+** readz_mf8_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** movaz {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w7_z0, svmfloat8x2_t,
+ z0 = svreadz_za8_mf8_vg1x2 (w7),
+ z0 = svreadz_za8_mf8_vg1x2 (w7))
+
/*
** readz_w8_z0:
** movaz {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\]
z0 = svreadz_za8_u8_vg1x2 (w12),
z0 = svreadz_za8_u8_vg1x2 (w12))
+/*
+** readz_mf8_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** movaz {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w12_z0, svmfloat8x2_t,
+ z0 = svreadz_za8_mf8_vg1x2 (w12),
+ z0 = svreadz_za8_mf8_vg1x2 (w12))
+
/*
** readz_w8p7_z0:
** movaz {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\]
z0 = svreadz_za8_u8_vg1x2 (w8 - 1),
z0 = svreadz_za8_u8_vg1x2 (w8 - 1))
+/*
+** readz_mf8_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** movaz {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w8m1_z0, svmfloat8x2_t,
+ z0 = svreadz_za8_mf8_vg1x2 (w8 - 1),
+ z0 = svreadz_za8_mf8_vg1x2 (w8 - 1))
+
/*
** readz_w8_z18:
** movaz {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\]
z18 = svreadz_za8_u8_vg1x2 (w8),
z18 = svreadz_za8_u8_vg1x2 (w8))
+/*
+** readz_mf8_w8_z18:
+** movaz {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w8_z18, svmfloat8x2_t,
+ z18 = svreadz_za8_mf8_vg1x2 (w8),
+ z18 = svreadz_za8_mf8_vg1x2 (w8))
+
/* Leave the assembler to check for correctness for misaligned registers. */
/*
TEST_READ_ZA_XN (readz_w8_z28, svuint8x2_t,
z28 = svreadz_za8_u8_vg1x2 (w8),
z28 = svreadz_za8_u8_vg1x2 (w8))
+
+/*
+** readz_mf8_w8_z28:
+** movaz {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w8_z28, svmfloat8x2_t,
+ z28 = svreadz_za8_mf8_vg1x2 (w8),
+ z28 = svreadz_za8_mf8_vg1x2 (w8))
z0 = svreadz_za8_u8_vg1x4 (w0),
z0 = svreadz_za8_u8_vg1x4 (w0))
+/*
+** readz_mf8_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** movaz {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w0_z0, svmfloat8x4_t,
+ z0 = svreadz_za8_mf8_vg1x4 (w0),
+ z0 = svreadz_za8_mf8_vg1x4 (w0))
+
/*
** readz_w7_z0:
** mov (w8|w9|w10|w11), w7
z0 = svreadz_za8_u8_vg1x4 (w11))
+/*
+** readz_mf8_w11_z0:
+** movaz {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w11_z0, svmfloat8x4_t,
+ z0 = svreadz_za8_mf8_vg1x4 (w11),
+ z0 = svreadz_za8_mf8_vg1x4 (w11))
+
+
/*
** readz_w12_z0:
** mov (w8|w9|w10|w11), w12
z0 = svreadz_za8_u8_vg1x4 (w8 + 8),
z0 = svreadz_za8_u8_vg1x4 (w8 + 8))
+/*
+** readz_mf8_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** movaz {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w8p8_z0, svmfloat8x4_t,
+ z0 = svreadz_za8_mf8_vg1x4 (w8 + 8),
+ z0 = svreadz_za8_mf8_vg1x4 (w8 + 8))
+
/*
** readz_w8m1_z0:
** sub (w8|w9|w10|w11), w8, #?1
z18 = svreadz_za8_u8_vg1x4 (w8),
z18 = svreadz_za8_u8_vg1x4 (w8))
+/*
+** readz_mf8_w8_z18:
+** movaz [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w8_z18, svmfloat8x4_t,
+ z18 = svreadz_za8_mf8_vg1x4 (w8),
+ z18 = svreadz_za8_mf8_vg1x4 (w8))
+
/*
** readz_w8_z23:
** movaz [^\n]+, za\.d\[w8, 0, vgx4\]
z23 = svreadz_za8_u8_vg1x4 (w8),
z23 = svreadz_za8_u8_vg1x4 (w8))
+/*
+** readz_mf8_w8_z23:
+** movaz [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (readz_mf8_w8_z23, svmfloat8x4_t,
+ z23 = svreadz_za8_mf8_vg1x4 (w8),
+ z23 = svreadz_za8_mf8_vg1x4 (w8))
+
/*
** readz_w8_z28:
** movaz {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\]
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svmfloat8x2_t, z0,
+ svsel_mf8_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svmfloat8x2_t, z0,
+ svsel_mf8_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.b - z1\.b}, pn8, {z4\.b - z5\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svmfloat8x2_t, z0,
+ svsel_mf8_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.b - z5\.b}, pn8, {z18\.b - z19\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svmfloat8x2_t, z4,
+ svsel_mf8_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.b - z19\.b}, pn15, {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svmfloat8x2_t, z18,
+ svsel_mf8_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.b - z19\.b}, pn8, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svmfloat8x2_t, z18,
+ svsel_mf8_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svmfloat8x2_t, z23,
+ svsel_mf8_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.b - z1\.b}, pn15, {[^}]+}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svmfloat8x2_t, z0,
+ svsel_mf8_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.b - z1\.b}, pn8, {z28\.b - z29\.b}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svmfloat8x2_t, z0,
+ svsel_mf8_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svmfloat8x4_t, z0,
+ svsel_mf8_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svmfloat8x4_t, z0,
+ svsel_mf8_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.b - z3\.b}, pn8, {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svmfloat8x4_t, z0,
+ svsel_mf8_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.b - z7\.b}, pn8, {[^}]+}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svmfloat8x4_t, z4,
+ svsel_mf8_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.b - z31\.b}, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svmfloat8x4_t, z18,
+ svsel_mf8_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svmfloat8x4_t, z18,
+ svsel_mf8_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.b - z3\.b}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svmfloat8x4_t, z23,
+ svsel_mf8_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_mf8_base:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_base, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_mf8_index:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_index, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_1, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/*
+** st1_mf8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_2, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** st1_mf8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_14, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svst1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_16, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svst1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/*
+** st1_mf8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** st1_mf8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svst1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** st1_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svst1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** st1_mf8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_z17, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_mf8_z22:
+** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_z22, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_mf8_z28:
+** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_z28, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_pn0, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_pn7, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_mf8_pn15:
+** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_pn15, svmfloat8x2_t, mfloat8_t,
+ svst1_mf8_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_mf8_0:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_mf8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_mf8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_mf8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_mf8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,
+ svst1_vnum_mf8_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_mf8_base:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_base, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_mf8_index:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_index, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_1, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_2, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_3, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svst1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** st1_mf8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_4, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svst1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** st1_mf8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_28, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svst1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** st1_mf8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_32, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svst1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svst1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** st1_mf8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svst1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** st1_mf8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svst1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** st1_mf8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svst1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** st1_mf8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_z17, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_mf8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_z22, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_mf8_z28:
+** st1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_z28, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_pn0, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_pn7, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_mf8_pn15:
+** st1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_mf8_pn15, svmfloat8x4_t, mfloat8_t,
+ svst1_mf8_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_mf8_0:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_mf8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_mf8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_mf8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_mf8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_mf8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_mf8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,
+ svst1_vnum_mf8_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_mf8_base:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_mf8_index:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/*
+** stnt1_mf8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** stnt1_mf8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_14, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svstnt1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_16, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svstnt1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/*
+** stnt1_mf8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** stnt1_mf8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svstnt1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** stnt1_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svstnt1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** stnt1_mf8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_mf8_z22:
+** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_mf8_z28:
+** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_mf8_pn15:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_mf8_0:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_mf8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_mf8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_mf8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_mf8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_mf8_base:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_mf8_index:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_3, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svstnt1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** stnt1_mf8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_4, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svstnt1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** stnt1_mf8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_28, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svstnt1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** stnt1_mf8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_32, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svstnt1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svstnt1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** stnt1_mf8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svstnt1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** stnt1_mf8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svstnt1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** stnt1_mf8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svstnt1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** stnt1_mf8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_mf8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_mf8_z28:
+** stnt1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_mf8_pn15:
+** stnt1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_mf8_0:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_mf8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_mf8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_mf8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_mf8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_mf8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_mf8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.b - z1\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (uzp_z0_z0, svmfloat8x2_t, z0,
+ svuzp_mf8_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.b - z1\.b}, z4\.b, z5\.b
+** ret
+*/
+TEST_XN (uzp_z0_z4, svmfloat8x2_t, z0,
+ svuzp_mf8_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.b - z5\.b}, z18\.b, z19\.b
+** ret
+*/
+TEST_XN (uzp_z4_z18, svmfloat8x2_t, z4,
+ svuzp_mf8_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.b - z19\.b}, z23\.b, z24\.b
+** ret
+*/
+TEST_XN (uzp_z18_z23, svmfloat8x2_t, z18,
+ svuzp_mf8_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.b, z29\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svmfloat8x2_t, z23,
+ svuzp_mf8_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.b - z29\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (uzp_z28_z0, svmfloat8x2_t, z28,
+ svuzp_mf8_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23: { xfail aarch64_big_endian }
+** uzp {z28\.b - z29\.b}, z0\.b, z23\.b
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svmfloat8x2_t, z28,
+ svuzp_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.b - z29\.b}, z5\.b, z19\.b
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svmfloat8x2_t, z28,
+ svuzp_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svmfloat8x4_t, z0,
+ svuzp_mf8_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svmfloat8x4_t, z0,
+ svuzp_mf8_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.b - z7\.b}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svmfloat8x4_t, z4,
+ svuzp_mf8_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svmfloat8x4_t, z18,
+ svuzp_mf8_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svmfloat8x4_t, z23,
+ svuzp_mf8_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svmfloat8x4_t, z28,
+ svuzp_mf8_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svmfloat8x2_t, z0,
+ svuzpq_mf8_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svmfloat8x2_t, z0,
+ svuzpq_mf8_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svmfloat8x2_t, z4,
+ svuzpq_mf8_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svmfloat8x2_t, z18,
+ svuzpq_mf8_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svmfloat8x2_t, z23,
+ svuzpq_mf8_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svmfloat8x2_t, z28,
+ svuzpq_mf8_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23: { xfail aarch64_big_endian }
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svmfloat8x2_t, z28,
+ svuzpq_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svmfloat8x2_t, z28,
+ svuzpq_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svmfloat8x4_t, z0,
+ svuzpq_mf8_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svmfloat8x4_t, z0,
+ svuzpq_mf8_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svmfloat8x4_t, z4,
+ svuzpq_mf8_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svmfloat8x4_t, z18,
+ svuzpq_mf8_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svmfloat8x4_t, z23,
+ svuzpq_mf8_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svmfloat8x4_t, z28,
+ svuzpq_mf8_x4 (z0),
+ svuzpq (z0))
svwrite_hor_za8_u8_vg2 (0, 1, z4),
svwrite_hor_za8_u8_vg2 (0, 1, z4))
+/*
+** write_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x2_t,
+ svwrite_hor_za8_mf8_vg2 (0, 1, z4),
+ svwrite_hor_za8_mf8_vg2 (0, 1, z4))
+
/*
** write_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
svwrite_hor_za8_u8_vg2 (0, w15, z18),
svwrite_hor_za8_u8_vg2 (0, w15, z18))
+/*
+** write_za8_mf8_z18_0_w15:
+** mova za0h\.b\[w15, 0:1\], {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x2_t,
+ svwrite_hor_za8_mf8_vg2 (0, w15, z18),
+ svwrite_hor_za8_mf8_vg2 (0, w15, z18))
+
/*
** write_za8_s8_z23_0_w12p14:
** mov [^\n]+
svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4),
svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4))
+/*
+** write_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x2_t,
+ svwrite_hor_za8_mf8_vg2 (0, w12 + 1, z4),
+ svwrite_hor_za8_mf8_vg2 (0, w12 + 1, z4))
+
/*
** write_za8_s8_z28_0_w12p2:
** mova za0h\.b\[w12, 2:3\], {z28\.b - z29\.b}
svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0),
svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0))
+/*
+** write_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0h\.b\[\1, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x2_t,
+ svwrite_hor_za8_mf8_vg2 (0, w15 + 3, z0),
+ svwrite_hor_za8_mf8_vg2 (0, w15 + 3, z0))
+
/*
** write_za8_u8_z4_0_w15p12:
** mova za0h\.b\[w15, 12:13\], {z4\.b - z5\.b}
svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4),
svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4))
+/*
+** write_za8_mf8_z4_0_w15p12:
+** mova za0h\.b\[w15, 12:13\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x2_t,
+ svwrite_hor_za8_mf8_vg2 (0, w15 + 12, z4),
+ svwrite_hor_za8_mf8_vg2 (0, w15 + 12, z4))
+
/*
** write_za8_u8_z28_0_w12p15:
** add (w[0-9]+), w12, #?15
svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28),
svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28))
+/*
+** write_za8_mf8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** mova za0h\.b\[\1, 0:1\], {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z28_0_w12p15, svmfloat8x2_t,
+ svwrite_hor_za8_mf8_vg2 (0, w12 + 15, z28),
+ svwrite_hor_za8_mf8_vg2 (0, w12 + 15, z28))
+
/*
** write_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4),
svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4))
+/*
+** write_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x2_t,
+ svwrite_hor_za8_mf8_vg2 (0, w12 - 1, z4),
+ svwrite_hor_za8_mf8_vg2 (0, w12 - 1, z4))
+
/*
** write_za8_u8_z18_0_w16:
** mov (w1[2-5]), w16
TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t,
svwrite_hor_za8_u8_vg2 (0, w16, z18),
svwrite_hor_za8_u8_vg2 (0, w16, z18))
+
+/*
+** write_za8_mf8_z18_0_w16:
+** mov (w1[2-5]), w16
+** mova za0h\.b\[\1, 0:1\], {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z18_0_w16, svmfloat8x2_t,
+ svwrite_hor_za8_mf8_vg2 (0, w16, z18),
+ svwrite_hor_za8_mf8_vg2 (0, w16, z18))
svwrite_hor_za8_u8_vg4 (0, 1, z4),
svwrite_hor_za8_u8_vg4 (0, 1, z4))
+/*
+** write_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x4_t,
+ svwrite_hor_za8_mf8_vg4 (0, 1, z4),
+ svwrite_hor_za8_mf8_vg4 (0, 1, z4))
+
/*
** write_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
svwrite_hor_za8_u8_vg4 (0, w15, z18),
svwrite_hor_za8_u8_vg4 (0, w15, z18))
+/*
+** write_za8_mf8_z18_0_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0h\.b\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x4_t,
+ svwrite_hor_za8_mf8_vg4 (0, w15, z18),
+ svwrite_hor_za8_mf8_vg4 (0, w15, z18))
+
/*
** write_za8_s8_z23_0_w12p12:
** mov [^\n]+
svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4),
svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4))
+/*
+** write_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x4_t,
+ svwrite_hor_za8_mf8_vg4 (0, w12 + 1, z4),
+ svwrite_hor_za8_mf8_vg4 (0, w12 + 1, z4))
+
/*
** write_za8_s8_z28_0_w12p2:
** add (w[0-9]+), w12, #?2
svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0),
svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0))
+/*
+** write_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0h\.b\[\1, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x4_t,
+ svwrite_hor_za8_mf8_vg4 (0, w15 + 3, z0),
+ svwrite_hor_za8_mf8_vg4 (0, w15 + 3, z0))
+
/*
** write_za8_u8_z0_0_w12p4:
** mova za0h\.b\[w12, 4:7\], {z0\.b - z3\.b}
svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0),
svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0))
+/*
+** write_za8_mf8_z0_0_w12p4:
+** mova za0h\.b\[w12, 4:7\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z0_0_w12p4, svmfloat8x4_t,
+ svwrite_hor_za8_mf8_vg4 (0, w12 + 4, z0),
+ svwrite_hor_za8_mf8_vg4 (0, w12 + 4, z0))
+
/*
** write_za8_u8_z4_0_w15p12:
** mova za0h\.b\[w15, 12:15\], {z4\.b - z7\.b}
svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4),
svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4))
+/*
+** write_za8_mf8_z4_0_w15p12:
+** mova za0h\.b\[w15, 12:15\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x4_t,
+ svwrite_hor_za8_mf8_vg4 (0, w15 + 12, z4),
+ svwrite_hor_za8_mf8_vg4 (0, w15 + 12, z4))
+
/*
** write_za8_u8_z28_0_w12p14:
** add (w[0-9]+), w12, #?14
svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28),
svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28))
+/*
+** write_za8_mf8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z28_0_w12p14, svmfloat8x4_t,
+ svwrite_hor_za8_mf8_vg4 (0, w12 + 14, z28),
+ svwrite_hor_za8_mf8_vg4 (0, w12 + 14, z28))
+
/*
** write_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4),
svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4))
+/*
+** write_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x4_t,
+ svwrite_hor_za8_mf8_vg4 (0, w12 - 1, z4),
+ svwrite_hor_za8_mf8_vg4 (0, w12 - 1, z4))
+
/*
** write_za8_u8_z28_0_w16:
** mov (w1[2-5]), w16
TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t,
svwrite_hor_za8_u8_vg4 (0, w16, z28),
svwrite_hor_za8_u8_vg4 (0, w16, z28))
+
+/*
+** write_za8_mf8_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z28_0_w16, svmfloat8x4_t,
+ svwrite_hor_za8_mf8_vg4 (0, w16, z28),
+ svwrite_hor_za8_mf8_vg4 (0, w16, z28))
svwrite_ver_za8_u8_vg2 (0, 1, z4),
svwrite_ver_za8_u8_vg2 (0, 1, z4))
+/*
+** write_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x2_t,
+ svwrite_ver_za8_mf8_vg2 (0, 1, z4),
+ svwrite_ver_za8_mf8_vg2 (0, 1, z4))
+
/*
** write_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
svwrite_ver_za8_u8_vg2 (0, w15, z18),
svwrite_ver_za8_u8_vg2 (0, w15, z18))
+/*
+** write_za8_mf8_z18_0_w15:
+** mova za0v\.b\[w15, 0:1\], {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x2_t,
+ svwrite_ver_za8_mf8_vg2 (0, w15, z18),
+ svwrite_ver_za8_mf8_vg2 (0, w15, z18))
+
/*
** write_za8_s8_z23_0_w12p14:
** mov [^\n]+
svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4),
svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4))
+/*
+** write_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x2_t,
+ svwrite_ver_za8_mf8_vg2 (0, w12 + 1, z4),
+ svwrite_ver_za8_mf8_vg2 (0, w12 + 1, z4))
+
/*
** write_za8_s8_z28_0_w12p2:
** mova za0v\.b\[w12, 2:3\], {z28\.b - z29\.b}
svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0),
svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0))
+/*
+** write_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0v\.b\[\1, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x2_t,
+ svwrite_ver_za8_mf8_vg2 (0, w15 + 3, z0),
+ svwrite_ver_za8_mf8_vg2 (0, w15 + 3, z0))
+
/*
** write_za8_u8_z4_0_w15p12:
** mova za0v\.b\[w15, 12:13\], {z4\.b - z5\.b}
svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4),
svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4))
+/*
+** write_za8_mf8_z4_0_w15p12:
+** mova za0v\.b\[w15, 12:13\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x2_t,
+ svwrite_ver_za8_mf8_vg2 (0, w15 + 12, z4),
+ svwrite_ver_za8_mf8_vg2 (0, w15 + 12, z4))
+
/*
** write_za8_u8_z28_0_w12p15:
** add (w[0-9]+), w12, #?15
svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28),
svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28))
+/*
+** write_za8_mf8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** mova za0v\.b\[\1, 0:1\], {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z28_0_w12p15, svmfloat8x2_t,
+ svwrite_ver_za8_mf8_vg2 (0, w12 + 15, z28),
+ svwrite_ver_za8_mf8_vg2 (0, w12 + 15, z28))
+
/*
** write_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4),
svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4))
+/*
+** write_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x2_t,
+ svwrite_ver_za8_mf8_vg2 (0, w12 - 1, z4),
+ svwrite_ver_za8_mf8_vg2 (0, w12 - 1, z4))
+
/*
** write_za8_u8_z18_0_w16:
** mov (w1[2-5]), w16
TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t,
svwrite_ver_za8_u8_vg2 (0, w16, z18),
svwrite_ver_za8_u8_vg2 (0, w16, z18))
+
+/*
+** write_za8_mf8_z18_0_w16:
+** mov (w1[2-5]), w16
+** mova za0v\.b\[\1, 0:1\], {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z18_0_w16, svmfloat8x2_t,
+ svwrite_ver_za8_mf8_vg2 (0, w16, z18),
+ svwrite_ver_za8_mf8_vg2 (0, w16, z18))
svwrite_ver_za8_u8_vg4 (0, 1, z4),
svwrite_ver_za8_u8_vg4 (0, 1, z4))
+/*
+** write_za8_mf8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x4_t,
+ svwrite_ver_za8_mf8_vg4 (0, 1, z4),
+ svwrite_ver_za8_mf8_vg4 (0, 1, z4))
+
/*
** write_za8_s8_z28_0_w11:
** mov (w1[2-5]), w11
svwrite_ver_za8_u8_vg4 (0, w15, z18),
svwrite_ver_za8_u8_vg4 (0, w15, z18))
+/*
+** write_za8_mf8_z18_0_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0v\.b\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x4_t,
+ svwrite_ver_za8_mf8_vg4 (0, w15, z18),
+ svwrite_ver_za8_mf8_vg4 (0, w15, z18))
+
/*
** write_za8_s8_z23_0_w12p12:
** mov [^\n]+
svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4),
svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4))
+/*
+** write_za8_mf8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x4_t,
+ svwrite_ver_za8_mf8_vg4 (0, w12 + 1, z4),
+ svwrite_ver_za8_mf8_vg4 (0, w12 + 1, z4))
+
/*
** write_za8_s8_z28_0_w12p2:
** add (w[0-9]+), w12, #?2
svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0),
svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0))
+/*
+** write_za8_mf8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0v\.b\[\1, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x4_t,
+ svwrite_ver_za8_mf8_vg4 (0, w15 + 3, z0),
+ svwrite_ver_za8_mf8_vg4 (0, w15 + 3, z0))
+
/*
** write_za8_u8_z0_0_w12p4:
** mova za0v\.b\[w12, 4:7\], {z0\.b - z3\.b}
svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0),
svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0))
+/*
+** write_za8_mf8_z0_0_w12p4:
+** mova za0v\.b\[w12, 4:7\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z0_0_w12p4, svmfloat8x4_t,
+ svwrite_ver_za8_mf8_vg4 (0, w12 + 4, z0),
+ svwrite_ver_za8_mf8_vg4 (0, w12 + 4, z0))
+
/*
** write_za8_u8_z4_0_w15p12:
** mova za0v\.b\[w15, 12:15\], {z4\.b - z7\.b}
svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4),
svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4))
+/*
+** write_za8_mf8_z4_0_w15p12:
+** mova za0v\.b\[w15, 12:15\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x4_t,
+ svwrite_ver_za8_mf8_vg4 (0, w15 + 12, z4),
+ svwrite_ver_za8_mf8_vg4 (0, w15 + 12, z4))
+
/*
** write_za8_u8_z28_0_w12p14:
** add (w[0-9]+), w12, #?14
svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28),
svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28))
+/*
+** write_za8_mf8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z28_0_w12p14, svmfloat8x4_t,
+ svwrite_ver_za8_mf8_vg4 (0, w12 + 14, z28),
+ svwrite_ver_za8_mf8_vg4 (0, w12 + 14, z28))
+
/*
** write_za8_s8_z0_0_w15p16:
** add (w[0-9]+), w15, #?16
svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4),
svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4))
+/*
+** write_za8_mf8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x4_t,
+ svwrite_ver_za8_mf8_vg4 (0, w12 - 1, z4),
+ svwrite_ver_za8_mf8_vg4 (0, w12 - 1, z4))
+
/*
** write_za8_u8_z28_0_w16:
** mov (w1[2-5]), w16
TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t,
svwrite_ver_za8_u8_vg4 (0, w16, z28),
svwrite_ver_za8_u8_vg4 (0, w16, z28))
+
+/*
+** write_za8_mf8_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_mf8_z28_0_w16, svmfloat8x4_t,
+ svwrite_ver_za8_mf8_vg4 (0, w16, z28),
+ svwrite_ver_za8_mf8_vg4 (0, w16, z28))
svwrite_za8_u8_vg1x2 (w7, z0),
svwrite_za8_vg1x2 (w7, z0))
+/*
+** write_mf8_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_mf8_w7_z0, svmfloat8x2_t,
+ svwrite_za8_mf8_vg1x2 (w7, z0),
+ svwrite_za8_vg1x2 (w7, z0))
+
/*
** write_w8_z0:
** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
svwrite_za8_u8_vg1x2 (w12, z0),
svwrite_za8_vg1x2 (w12, z0))
+/*
+** write_mf8_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_mf8_w12_z0, svmfloat8x2_t,
+ svwrite_za8_mf8_vg1x2 (w12, z0),
+ svwrite_za8_vg1x2 (w12, z0))
+
/*
** write_w8p7_z0:
** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
svwrite_za8_u8_vg1x2 (w8 - 1, z0),
svwrite_za8_vg1x2 (w8 - 1, z0))
+/*
+** write_mf8_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_mf8_w8m1_z0, svmfloat8x2_t,
+ svwrite_za8_mf8_vg1x2 (w8 - 1, z0),
+ svwrite_za8_vg1x2 (w8 - 1, z0))
+
/*
** write_w8_z18:
** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
svwrite_za8_u8_vg1x2 (w8, z18),
svwrite_za8_vg1x2 (w8, z18))
+/*
+** write_mf8_w8_z18:
+** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (write_mf8_w8_z18, svmfloat8x2_t,
+ svwrite_za8_mf8_vg1x2 (w8, z18),
+ svwrite_za8_vg1x2 (w8, z18))
+
/* Leave the assembler to check for correctness for misaligned registers. */
/*
TEST_ZA_XN (write_w8_z28, svuint8x2_t,
svwrite_za8_u8_vg1x2 (w8, z28),
svwrite_za8_vg1x2 (w8, z28))
+
+/*
+** write_mf8_w8_z28:
+** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (write_mf8_w8_z28, svmfloat8x2_t,
+ svwrite_za8_mf8_vg1x2 (w8, z28),
+ svwrite_za8_vg1x2 (w8, z28))
svwrite_za8_u8_vg1x4 (w0, z0),
svwrite_za8_vg1x4 (w0, z0))
+/*
+** write_mf8_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_mf8_w0_z0, svmfloat8x4_t,
+ svwrite_za8_mf8_vg1x4 (w0, z0),
+ svwrite_za8_vg1x4 (w0, z0))
+
/*
** write_w7_z0:
** mov (w8|w9|w10|w11), w7
svwrite_za8_u8_vg1x4 (w11, z0),
svwrite_za8_vg1x4 (w11, z0))
+/*
+** write_mf8_w11_z0:
+** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_mf8_w11_z0, svmfloat8x4_t,
+ svwrite_za8_mf8_vg1x4 (w11, z0),
+ svwrite_za8_vg1x4 (w11, z0))
/*
** write_w12_z0:
svwrite_za8_u8_vg1x4 (w8 + 8, z0),
svwrite_za8_vg1x4 (w8 + 8, z0))
+/*
+** write_mf8_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_mf8_w8p8_z0, svmfloat8x4_t,
+ svwrite_za8_mf8_vg1x4 (w8 + 8, z0),
+ svwrite_za8_vg1x4 (w8 + 8, z0))
+
/*
** write_w8m1_z0:
** sub (w8|w9|w10|w11), w8, #?1
svwrite_za8_u8_vg1x4 (w8, z18),
svwrite_za8_vg1x4 (w8, z18))
+/*
+** write_mf8_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_mf8_w8_z18, svmfloat8x4_t,
+ svwrite_za8_mf8_vg1x4 (w8, z18),
+ svwrite_za8_vg1x4 (w8, z18))
+
/*
** write_w8_z23:
** mov [^\n]+
svwrite_za8_u8_vg1x4 (w8, z23),
svwrite_za8_vg1x4 (w8, z23))
+/*
+** write_mf8_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_mf8_w8_z23, svmfloat8x4_t,
+ svwrite_za8_mf8_vg1x4 (w8, z23),
+ svwrite_za8_vg1x4 (w8, z23))
+
/*
** write_w8_z28:
** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.b - z1\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (zip_z0_z0, svmfloat8x2_t, z0,
+ svzip_mf8_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.b - z1\.b}, z4\.b, z5\.b
+** ret
+*/
+TEST_XN (zip_z0_z4, svmfloat8x2_t, z0,
+ svzip_mf8_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.b - z5\.b}, z18\.b, z19\.b
+** ret
+*/
+TEST_XN (zip_z4_z18, svmfloat8x2_t, z4,
+ svzip_mf8_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.b - z19\.b}, z23\.b, z24\.b
+** ret
+*/
+TEST_XN (zip_z18_z23, svmfloat8x2_t, z18,
+ svzip_mf8_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.b, z29\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svmfloat8x2_t, z23,
+ svzip_mf8_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.b - z29\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (zip_z28_z0, svmfloat8x2_t, z28,
+ svzip_mf8_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23: { xfail aarch64_big_endian }
+** zip {z28\.b - z29\.b}, z0\.b, z23\.b
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svmfloat8x2_t, z28,
+ svzip_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.b - z29\.b}, z5\.b, z19\.b
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svmfloat8x2_t, z28,
+ svzip_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (zip_z0_z0, svmfloat8x4_t, z0,
+ svzip_mf8_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (zip_z0_z4, svmfloat8x4_t, z0,
+ svzip_mf8_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.b - z7\.b}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svmfloat8x4_t, z4,
+ svzip_mf8_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svmfloat8x4_t, z18,
+ svzip_mf8_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svmfloat8x4_t, z23,
+ svzip_mf8_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (zip_z28_z0, svmfloat8x4_t, z28,
+ svzip_mf8_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svmfloat8x2_t, z0,
+ svzipq_mf8_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svmfloat8x2_t, z0,
+ svzipq_mf8_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svmfloat8x2_t, z4,
+ svzipq_mf8_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svmfloat8x2_t, z18,
+ svzipq_mf8_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svmfloat8x2_t, z23,
+ svzipq_mf8_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svmfloat8x2_t, z28,
+ svzipq_mf8_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23: { xfail aarch64_big_endian }
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svmfloat8x2_t, z28,
+ svzipq_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svmfloat8x2_t, z28,
+ svzipq_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svmfloat8x4_t, z0,
+ svzipq_mf8_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svmfloat8x4_t, z0,
+ svzipq_mf8_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svmfloat8x4_t, z4,
+ svzipq_mf8_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svmfloat8x4_t, z18,
+ svzipq_mf8_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svmfloat8x4_t, z23,
+ svzipq_mf8_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svmfloat8x4_t, z28,
+ svzipq_mf8_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_mf8_base:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_mf8_index:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + svcntb ()),
+ z0 = svld1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ld1_mf8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ld1_mf8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 - svcntb ()),
+ z0 = svld1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ld1_mf8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ld1_mf8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ld1_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ld1_mf8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x2_t, mfloat8_t,
+ z17 = svld1_mf8_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_mf8_z22:
+** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x2_t, mfloat8_t,
+ z22 = svld1_mf8_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_mf8_z28:
+** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x2_t, mfloat8_t,
+ z28 = svld1_mf8_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_mf8_pn15:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_mf8_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_mf8_0:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_mf8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_mf8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_mf8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_mf8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_mf8_base:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_mf8_index:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb ()),
+ z0 = svld1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ld1_mf8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ld1_mf8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ld1_mf8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb ()),
+ z0 = svld1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ld1_mf8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ld1_mf8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ld1_mf8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ld1_mf8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x4_t, mfloat8_t,
+ z17 = svld1_mf8_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_mf8_z22:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x4_t, mfloat8_t,
+ z22 = svld1_mf8_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_mf8_z28:
+** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x4_t, mfloat8_t,
+ z28 = svld1_mf8_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_mf8_pn15:
+** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_mf8_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_mf8_0:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_mf8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_mf8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_mf8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_mf8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_mf8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_mf8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld1_vnum_mf8_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_mf8_base:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_mf8_index:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ldnt1_mf8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ldnt1_mf8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ldnt1_mf8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ldnt1_mf8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ldnt1_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ldnt1_mf8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x2_t, mfloat8_t,
+ z17 = svldnt1_mf8_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_mf8_z22:
+** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x2_t, mfloat8_t,
+ z22 = svldnt1_mf8_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_mf8_z28:
+** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x2_t, mfloat8_t,
+ z28 = svldnt1_mf8_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_mf8_pn15:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_mf8_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_mf8_0:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_mf8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_mf8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_mf8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_mf8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_mf8_base:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_mf8_index:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ldnt1_mf8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ldnt1_mf8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ldnt1_mf8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ldnt1_mf8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ldnt1_mf8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ldnt1_mf8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ldnt1_mf8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x4_t, mfloat8_t,
+ z17 = svldnt1_mf8_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_mf8_z22:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x4_t, mfloat8_t,
+ z22 = svldnt1_mf8_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_mf8_z28:
+** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x4_t, mfloat8_t,
+ z28 = svldnt1_mf8_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_mf8_pn15:
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_mf8_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_mf8_0:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_mf8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_mf8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_mf8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_mf8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_mf8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_mf8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+
+/*
+** revd_mf8_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_m_tied12, svmfloat8_t,
+ z0 = svrevd_mf8_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_mf8_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_m_tied1, svmfloat8_t,
+ z0 = svrevd_mf8_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_mf8_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_m_tied2, svmfloat8_t,
+ z0 = svrevd_mf8_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_mf8_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_m_untied, svmfloat8_t,
+ z0 = svrevd_mf8_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_mf8_z_tied1, svmfloat8_t,
+ z0 = svrevd_mf8_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_mf8_z_untied:
+** movi? [vdz]0\.?(?:[0-9]*[bhsd])?, #?0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_z_untied, svmfloat8_t,
+ z0 = svrevd_mf8_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_mf8_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_x_tied1, svmfloat8_t,
+ z0 = svrevd_mf8_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_mf8_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_mf8_x_untied, svmfloat8_t,
+ z0 = svrevd_mf8_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_mf8_base:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_mf8_index:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/*
+** stnt1_mf8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** stnt1_mf8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_14, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svstnt1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_16, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svstnt1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/*
+** stnt1_mf8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** stnt1_mf8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svstnt1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** stnt1_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svstnt1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** stnt1_mf8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_mf8_z22:
+** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_mf8_z28:
+** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_mf8_pn15:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t,
+ svstnt1_mf8_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_mf8_0:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_mf8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_mf8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_mf8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_mf8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,
+ svstnt1_vnum_mf8_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_mf8_base:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_mf8_index:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_3, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svstnt1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** stnt1_mf8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_4, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svstnt1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** stnt1_mf8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_28, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svstnt1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** stnt1_mf8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_32, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svstnt1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svstnt1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** stnt1_mf8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svstnt1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** stnt1_mf8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svstnt1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** stnt1_mf8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svstnt1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** stnt1_mf8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_mf8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_mf8_z28:
+** stnt1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_mf8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_mf8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_mf8_pn15:
+** stnt1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t,
+ svstnt1_mf8_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_mf8_0:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_mf8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_mf8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_mf8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_mf8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_mf8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_mf8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,
+ svstnt1_vnum_mf8_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))