return false;
auto stride_type = get_attr_stride_type (insn);
- return (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
- || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE);
+ return (TARGET_STREAMING_SME2
+ && (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
+ || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE));
}
// Go through the constraints of INSN, which has already been extracted,
auto stride_type = get_attr_stride_type (insn);
rtx pat = PATTERN (insn);
rtx op;
- if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
+ if (TARGET_STREAMING_SME2 && stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
op = SET_DEST (pat);
- else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
+ else if (TARGET_STREAMING_SME2 && stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
op = XVECEXP (SET_SRC (pat), 0, 1);
else
return false;
DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2)
-DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
+#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p1, AARCH64_FL_SME2)
DEF_SVE_FUNCTION (svbfmlslb, ternary_bfloat_opt_n, s_float, none)
DEF_SVE_FUNCTION (svbfmlslb_lane, ternary_bfloat_lane, s_float, none)
DEF_SVE_FUNCTION (svbfmlslt, ternary_bfloat_opt_n, s_float, none)
DEF_SVE_FUNCTION (svbfmlslt_lane, ternary_bfloat_lane, s_float, none)
DEF_SVE_FUNCTION (svclamp, clamp, all_float, none)
-DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
DEF_SVE_FUNCTION (svcntp, count_pred_c, all_count, none)
-DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_h_s_float, x2, none)
-DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_s_s, x24, none)
-DEF_SVE_FUNCTION_GS (svcvtn, unary_convertxn, cvt_h_s_float, x2, none)
DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n_or_011, s_narrow_fsu, none)
DEF_SVE_FUNCTION (svdot_lane, ternary_qq_or_011_lane, s_narrow_fsu, none)
DEF_SVE_FUNCTION_GS (svld1, load, all_data, x24, implicit)
DEF_SVE_FUNCTION_GS (svldnt1, load, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svpext_lane, extract_pred, all_count, x12, none)
+DEF_SVE_FUNCTION (svptrue, inherent, all_count, none)
+DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svst1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svstnt1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svwhilege, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilege, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilegt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilegt, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilele, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilele, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilelt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilelt, compare_scalar_count, while_x_c, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2)
+DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
+DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_h_s_float, x2, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_s_s, x24, none)
+DEF_SVE_FUNCTION_GS (svcvtn, unary_convertxn, cvt_h_s_float, x2, none)
DEF_SVE_FUNCTION_GS (svmax, binary_opt_single_n, all_arith, x24, none)
DEF_SVE_FUNCTION_GS (svmaxnm, binary_opt_single_n, all_float, x24, none)
DEF_SVE_FUNCTION_GS (svmin, binary_opt_single_n, all_arith, x24, none)
DEF_SVE_FUNCTION_GS (svminnm, binary_opt_single_n, all_float, x24, none)
-DEF_SVE_FUNCTION_GS (svpext_lane, extract_pred, all_count, x12, none)
-DEF_SVE_FUNCTION (svptrue, inherent, all_count, none)
DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x4, x4, none)
-DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x4, x4, none)
DEF_SVE_FUNCTION_GS (svqdmulh, binary_opt_single_n, all_signed, x24, none)
DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x4, x4, none)
-DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x4, x4, none)
DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x4, x4, none)
-DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x4, x4, none)
DEF_SVE_FUNCTION_GS (svrinta, unaryxn, s_float, x24, none)
DEF_SVE_FUNCTION_GS (svrintm, unaryxn, s_float, x24, none)
DEF_SVE_FUNCTION_GS (svrintp, unaryxn, s_float, x24, none)
DEF_SVE_FUNCTION_GS (svrshl, binary_int_opt_single_n, all_integer, x24, none)
DEF_SVE_FUNCTION_GS (svsel, binaryxn, all_data, x24, implicit)
-DEF_SVE_FUNCTION_GS (svst1, storexn, all_data, x24, implicit)
-DEF_SVE_FUNCTION_GS (svstnt1, storexn, all_data, x24, implicit)
DEF_SVE_FUNCTION_GS (svunpk, unary_convertxn, bhs_widen, x24, none)
DEF_SVE_FUNCTION_GS (svuzp, unaryxn, all_data, x24, none)
DEF_SVE_FUNCTION_GS (svuzpq, unaryxn, all_data, x24, none)
-DEF_SVE_FUNCTION_GS (svwhilege, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilege, compare_scalar_count, while_x_c, none)
-DEF_SVE_FUNCTION_GS (svwhilegt, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilegt, compare_scalar_count, while_x_c, none)
-DEF_SVE_FUNCTION_GS (svwhilele, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilele, compare_scalar_count, while_x_c, none)
-DEF_SVE_FUNCTION_GS (svwhilelt, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilelt, compare_scalar_count, while_x_c, none)
DEF_SVE_FUNCTION_GS (svzip, unaryxn, all_data, x24, none)
DEF_SVE_FUNCTION_GS (svzipq, unaryxn, all_data, x24, none)
#undef REQUIRED_EXTENSIONS
(match_operand:SVE_FULL_SDI 4 "register_operand")))]
"TARGET_SVE
&& (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4
- || (TARGET_STREAMING_SME2
+ || (TARGET_SVE2p1_OR_SME2
&& <SVE_FULL_SDI:elem_bits> == 32
&& <SVE_FULL_BHI:elem_bits> == 16))"
{@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
;; - BFDOT (BF16)
;; - BFMLALB (BF16)
;; - BFMLALT (BF16)
-;; - BFMLSLB (SME2)
-;; - BFMLSLT (SME2)
+;; - BFMLSLB (SVE2p1, SME2)
+;; - BFMLSLT (SVE2p1, SME2)
;; - BFMMLA (BF16)
;; -------------------------------------------------------------------------
(match_operand:VNx8BF 2 "register_operand")
(match_operand:VNx8BF 3 "register_operand")]
SVE_BFLOAT_TERNARY_LONG))]
- "TARGET_SVE_BF16"
+ ""
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , 0 , w , w ; * ] <sve_fp_op>\t%0.s, %2.h, %3.h
[ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h
(match_operand:VNx8BF 3 "register_operand")
(match_operand:SI 4 "const_int_operand")]
SVE_BFLOAT_TERNARY_LONG_LANE))]
- "TARGET_SVE_BF16"
+ ""
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , 0 , w , y ; * ] <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
[ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]
[(match_operand:VNx16BI 2 "register_operand" "Uph")
(match_operand:SVE_FULLx24 1 "memory_operand" "m")]
LD1_COUNT))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"<optab><Vesize>\t%0, %K2/z, %1"
[(set_attr "stride_type" "ld1_consecutive")]
)
(match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
(match_dup 0)]
ST1_COUNT))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"<optab><Vesize>\t%1, %K2, %0"
[(set_attr "stride_type" "st1_consecutive")]
)
(define_insn "@aarch64_sve_ptrue_c<BHSD_BITS>"
[(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
(unspec:VNx16BI [(const_int BHSD_BITS)] UNSPEC_PTRUE_C))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"ptrue\t%K0.<bits_etype>"
)
(match_operand:DI 2 "const_int_operand")
(const_int BHSD_BITS)]
UNSPEC_PEXT))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"pext\t%0.<bits_etype>, %K1[%2]"
)
(match_operand:DI 2 "const_int_operand")
(const_int BHSD_BITS)]
UNSPEC_PEXTx2))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"pext\t{%S0.<bits_etype>, %T0.<bits_etype>}, %K1[%2]"
)
(match_operand:DI 2 "const_int_operand")
(const_int BHSD_BITS)]
UNSPEC_CNTP_C))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"cntp\t%x0, %K1.<bits_etype>, vlx%2"
)
UNSPEC_FMAXNM)
(match_operand:SVE_FULL_F 3 "register_operand")]
UNSPEC_FMINNM))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [cons: =0, 1, 2, 3; attrs: movprfx]
[ w, %0, w, w; * ] fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
[ ?&w, w, w, w; yes ] movprfx\t%0, %1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
UNSPEC_COND_FMAXNM)
(match_operand:SVE_FULL_F 3 "register_operand")]
UNSPEC_COND_FMINNM))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [cons: =0, 1, 2, 3; attrs: movprfx]
[ w, %0, w, w; * ] #
[ ?&w, w, w, w; yes ] #
(match_operand:VNx8HI 2 "register_operand")]
DOTPROD)
(match_operand:VNx4SI 3 "register_operand")))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , w , w , 0 ; * ] <sur>dot\t%0.s, %1.h, %2.h
[ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.h, %2.h
(match_operand:VNx8HF 2 "register_operand")]
UNSPEC_FDOT)
(match_operand:VNx4SF 3 "register_operand")))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , w , w , 0 ; * ] fdot\t%0.s, %1.h, %2.h
[ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;fdot\t%0.s, %1.h, %2.h
UNSPEC_SVE_LANE_SELECT)]
UNSPEC_FDOT)
(match_operand:VNx4SF 4 "register_operand")))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
[ w , w , y , 0 ; * ] fdot\t%0.s, %1.h, %2.h[%3]
[ ?&w , w , y , w ; yes ] movprfx\t%0, %4\;fdot\t%0.s, %1.h, %2.h[%3]
(unspec:VNx8HI_ONLY
[(match_operand:VNx8SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
SVE_QCVTxN))]
- "TARGET_STREAMING_SME2"
+ ""
"<optab>\t%0.h, %1"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Multi-vector narrowing right shifts
;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQRSHR
+;; - SQRSHRN
+;; - SQRSHRU
+;; - SQRSHRUN
+;; - UQRSHR
+;; - UQRSHRN
+;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:<VNARROW> 0 "register_operand" "=w")
[(match_operand:SVE_FULL_SIx2_SDIx4 1 "register_operand" "Uw<vector_count>")
(match_operand:DI 2 "const_int_operand")]
SVE2_INT_SHIFT_IMM_NARROWxN))]
- "TARGET_STREAMING_SME2"
+ "(<MODE>mode == VNx8SImode || TARGET_STREAMING_SME2)"
"<sve_int_op>\t%0.<Ventype>, %1, #%2"
)
(const_int BHSD_BITS)]
SVE_WHILE_ORDER))
(clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"while<cmp_op>\t{%S0.<bits_etype>, %T0.<bits_etype>}, %x1, %x2"
)
(match_operand:DI 3 "const_int_operand")]
SVE_WHILE_ORDER))
(clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"while<cmp_op>\t%K0.<bits_etype>, %x1, %x2, vlx%3"
)
functions, since streaming mode itself implies SME. */
#define TARGET_SVE2p1_OR_SME (TARGET_SVE2p1 || TARGET_STREAMING)
+#define TARGET_SVE2p1_OR_SME2 \
+ ((TARGET_SVE2p1 || TARGET_STREAMING) \
+ && (TARGET_SME2 || TARGET_NON_STREAMING))
+
/* Standard register usage. */
/* 31 64-bit general purpose registers R0-R30:
UNSPEC_FMIN UNSPEC_FMINNM])
(define_int_iterator SVE_BFLOAT_TERNARY_LONG
- [UNSPEC_BFDOT
- UNSPEC_BFMLALB
- UNSPEC_BFMLALT
- (UNSPEC_BFMLSLB "TARGET_STREAMING_SME2")
- (UNSPEC_BFMLSLT "TARGET_STREAMING_SME2")
- (UNSPEC_BFMMLA "TARGET_NON_STREAMING")])
+ [(UNSPEC_BFDOT "TARGET_SVE_BF16")
+ (UNSPEC_BFMLALB "TARGET_SVE_BF16")
+ (UNSPEC_BFMLALT "TARGET_SVE_BF16")
+ (UNSPEC_BFMLSLB "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_BFMLSLT "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_BFMMLA "TARGET_SVE_BF16 && TARGET_NON_STREAMING")])
(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE
- [UNSPEC_BFDOT
- UNSPEC_BFMLALB
- UNSPEC_BFMLALT
- (UNSPEC_BFMLSLB "TARGET_STREAMING_SME2")
- (UNSPEC_BFMLSLT "TARGET_STREAMING_SME2")])
+ [(UNSPEC_BFDOT "TARGET_SVE_BF16")
+ (UNSPEC_BFMLALB "TARGET_SVE_BF16")
+ (UNSPEC_BFMLALT "TARGET_SVE_BF16")
+ (UNSPEC_BFMLSLB "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_BFMLSLT "TARGET_SVE2p1_OR_SME2")])
(define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
UNSPEC_IORV
UNSPEC_UQRSHRNT
UNSPEC_UQSHRNT])
-(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWxN [UNSPEC_SQRSHR
- UNSPEC_SQRSHRN
- UNSPEC_SQRSHRU
- UNSPEC_SQRSHRUN
- UNSPEC_UQRSHR
- UNSPEC_UQRSHRN])
+(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWxN
+ [(UNSPEC_SQRSHR "TARGET_STREAMING_SME2")
+ (UNSPEC_SQRSHRN "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_SQRSHRU "TARGET_STREAMING_SME2")
+ (UNSPEC_SQRSHRUN "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_UQRSHR "TARGET_STREAMING_SME2")
+ (UNSPEC_UQRSHRN "TARGET_SVE2p1_OR_SME2")])
(define_int_iterator SVE2_INT_SHIFT_INSERT [UNSPEC_SLI UNSPEC_SRI])
(define_int_iterator SVE2_PMULL_PAIR [UNSPEC_PMULLB_PAIR UNSPEC_PMULLT_PAIR])
-(define_int_iterator SVE_QCVTxN [UNSPEC_SQCVT UNSPEC_SQCVTN
- UNSPEC_SQCVTU UNSPEC_SQCVTUN
- UNSPEC_UQCVT UNSPEC_UQCVTN])
+(define_int_iterator SVE_QCVTxN [(UNSPEC_SQCVT "TARGET_STREAMING_SME2")
+ (UNSPEC_SQCVTN "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_SQCVTU "TARGET_STREAMING_SME2")
+ (UNSPEC_SQCVTUN "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_UQCVT "TARGET_STREAMING_SME2")
+ (UNSPEC_UQCVTN "TARGET_SVE2p1_OR_SME2")])
(define_int_iterator SVE2_SFx24_UNARY [UNSPEC_FRINTA UNSPEC_FRINTM
UNSPEC_FRINTN UNSPEC_FRINTP])
--- /dev/null
+// { dg-options "-O" }
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p1"
+
+#define TEST(TYPE) \
+ TYPE \
+ tied1_##TYPE(TYPE a, TYPE b, TYPE c) \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c); \
+ } \
+ \
+ TYPE \
+ tied2_##TYPE(TYPE a, TYPE b, TYPE c) \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c); \
+ }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
--- /dev/null
+// { dg-options "-O" }
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p1"
+
+#define TEST(TYPE) \
+ TYPE \
+ untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, c), d); \
+ }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 3 } } */
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** bfmlslb_f32_tied1:
+** bfmlslb z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_f32 (z0, z4, z5),
+ z0 = svbfmlslb (z0, z4, z5))
+
+/*
+** bfmlslb_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_f32 (z4, z0, z1),
+ z0_res = svbfmlslb (z4, z0, z1))
+
+/*
+** bfmlslb_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_f32 (z4, z1, z0),
+ z0_res = svbfmlslb (z4, z1, z0))
+
+/*
+** bfmlslb_f32_untied:
+** movprfx z0, z1
+** bfmlslb z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_f32 (z1, z4, z5),
+ z0 = svbfmlslb (z1, z4, z5))
+
+/*
+** bfmlslb_h7_f32_tied1:
+** mov (z[0-9]+\.h), h7
+** bfmlslb z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslb_n_f32 (z0, z4, d7),
+ z0 = svbfmlslb (z0, z4, d7))
+
+/*
+** bfmlslb_h7_f32_untied:
+** mov (z[0-9]+\.h), h7
+** movprfx z0, z1
+** bfmlslb z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslb_n_f32 (z1, z4, d7),
+ z0 = svbfmlslb (z1, z4, d7))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** bfmlslb_lane_0_f32_tied1:
+** bfmlslb z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z0, z4, z5, 0),
+ z0 = svbfmlslb_lane (z0, z4, z5, 0))
+
+/*
+** bfmlslb_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_lane_f32 (z4, z0, z1, 0),
+ z0_res = svbfmlslb_lane (z4, z0, z1, 0))
+
+/*
+** bfmlslb_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_lane_f32 (z4, z1, z0, 0),
+ z0_res = svbfmlslb_lane (z4, z1, z0, 0))
+
+/*
+** bfmlslb_lane_0_f32_untied:
+** movprfx z0, z1
+** bfmlslb z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z1, z4, z5, 0),
+ z0 = svbfmlslb_lane (z1, z4, z5, 0))
+
+/*
+** bfmlslb_lane_1_f32:
+** bfmlslb z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_1_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z0, z4, z5, 1),
+ z0 = svbfmlslb_lane (z0, z4, z5, 1))
+
+/*
+** bfmlslb_lane_7_f32:
+** bfmlslb z0\.s, z4\.h, z5\.h\[7\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_7_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z0, z4, z5, 7),
+ z0 = svbfmlslb_lane (z0, z4, z5, 7))
+
+/*
+** bfmlslb_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** bfmlslb z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+ z0 = svbfmlslb_lane_f32 (z0, z1, z8, 1),
+ z0 = svbfmlslb_lane (z0, z1, z8, 1))
+
+/*
+** bfmlslb_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** bfmlslb z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+ z0 = svbfmlslb_lane_f32 (z0, z1, z16, 1),
+ z0 = svbfmlslb_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** bfmlslt_f32_tied1:
+** bfmlslt z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_f32 (z0, z4, z5),
+ z0 = svbfmlslt (z0, z4, z5))
+
+/*
+** bfmlslt_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_f32 (z4, z0, z1),
+ z0_res = svbfmlslt (z4, z0, z1))
+
+/*
+** bfmlslt_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_f32 (z4, z1, z0),
+ z0_res = svbfmlslt (z4, z1, z0))
+
+/*
+** bfmlslt_f32_untied:
+** movprfx z0, z1
+** bfmlslt z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_f32 (z1, z4, z5),
+ z0 = svbfmlslt (z1, z4, z5))
+
+/*
+** bfmlslt_h7_f32_tied1:
+** mov (z[0-9]+\.h), h7
+** bfmlslt z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslt_n_f32 (z0, z4, d7),
+ z0 = svbfmlslt (z0, z4, d7))
+
+/*
+** bfmlslt_h7_f32_untied:
+** mov (z[0-9]+\.h), h7
+** movprfx z0, z1
+** bfmlslt z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslt_n_f32 (z1, z4, d7),
+ z0 = svbfmlslt (z1, z4, d7))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** bfmlslt_lane_0_f32_tied1:
+** bfmlslt z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z0, z4, z5, 0),
+ z0 = svbfmlslt_lane (z0, z4, z5, 0))
+
+/*
+** bfmlslt_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_lane_f32 (z4, z0, z1, 0),
+ z0_res = svbfmlslt_lane (z4, z0, z1, 0))
+
+/*
+** bfmlslt_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_lane_f32 (z4, z1, z0, 0),
+ z0_res = svbfmlslt_lane (z4, z1, z0, 0))
+
+/*
+** bfmlslt_lane_0_f32_untied:
+** movprfx z0, z1
+** bfmlslt z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z1, z4, z5, 0),
+ z0 = svbfmlslt_lane (z1, z4, z5, 0))
+
+/*
+** bfmlslt_lane_1_f32:
+** bfmlslt z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_1_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z0, z4, z5, 1),
+ z0 = svbfmlslt_lane (z0, z4, z5, 1))
+
+/*
+** bfmlslt_lane_7_f32:
+** bfmlslt z0\.s, z4\.h, z5\.h\[7\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_7_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z0, z4, z5, 7),
+ z0 = svbfmlslt_lane (z0, z4, z5, 7))
+
+/*
+** bfmlslt_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** bfmlslt z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+ z0 = svbfmlslt_lane_f32 (z0, z1, z8, 1),
+ z0 = svbfmlslt_lane (z0, z1, z8, 1))
+
+/*
+** bfmlslt_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** bfmlslt z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+ z0 = svbfmlslt_lane_f32 (z0, z1, z16, 1),
+ z0 = svbfmlslt_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** clamp_f16_tied1:
+** fclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_tied1, svfloat16_t,
+ z0 = svclamp_f16 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_f16_tied2:
+** fclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_tied2, svfloat16_t,
+ z0 = svclamp_f16 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_f16_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** fclamp z0\.h, z2\.h, \1\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_tied3, svfloat16_t,
+ z0 = svclamp_f16 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_f16_untied:
+** movprfx z0, z1
+** fclamp z0\.h, z2\.h, z3\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_untied, svfloat16_t,
+ z0 = svclamp_f16 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** clamp_f32_tied1:
+** fclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_tied1, svfloat32_t,
+ z0 = svclamp_f32 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_f32_tied2:
+** fclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_tied2, svfloat32_t,
+ z0 = svclamp_f32 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** fclamp z0\.s, z2\.s, \1\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_tied3, svfloat32_t,
+ z0 = svclamp_f32 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_f32_untied:
+** movprfx z0, z1
+** fclamp z0\.s, z2\.s, z3\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_untied, svfloat32_t,
+ z0 = svclamp_f32 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** clamp_f64_tied1:
+** fclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_tied1, svfloat64_t,
+ z0 = svclamp_f64 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_f64_tied2:
+** fclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_tied2, svfloat64_t,
+ z0 = svclamp_f64 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_f64_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** fclamp z0\.d, z2\.d, \1\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_tied3, svfloat64_t,
+ z0 = svclamp_f64 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_f64_untied:
+** movprfx z0, z1
+** fclamp z0\.d, z2\.d, z3\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_untied, svfloat64_t,
+ z0 = svclamp_f64 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.h, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c16 (pn0, 2),
+ x0 = svcntp_c16 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.h, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c16 (pn7, 4),
+ x15 = svcntp_c16 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.h, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c16 (pn8, 2),
+ x17 = svcntp_c16 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.h, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c16 (pn15, 4),
+ x0 = svcntp_c16 (pn15, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.s, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c32 (pn0, 2),
+ x0 = svcntp_c32 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.s, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c32 (pn7, 4),
+ x15 = svcntp_c32 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.s, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c32 (pn8, 2),
+ x17 = svcntp_c32 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.s, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c32 (pn15, 4),
+ x0 = svcntp_c32 (pn15, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.d, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c64 (pn0, 2),
+ x0 = svcntp_c64 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.d, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c64 (pn7, 4),
+ x15 = svcntp_c64 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.d, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c64 (pn8, 2),
+ x17 = svcntp_c64 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.d, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c64 (pn15, 4),
+ x0 = svcntp_c64 (pn15, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.b, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c8 (pn0, 2),
+ x0 = svcntp_c8 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.b, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c8 (pn7, 4),
+ x15 = svcntp_c8 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.b, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c8 (pn8, 2),
+ x17 = svcntp_c8 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.b, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c8 (pn15, 4),
+ x0 = svcntp_c8 (pn15, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_f32_tied1:
+** fdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_f32_tied1, svfloat32_t, svfloat16_t,
+ z0 = svdot_f32_f16 (z0, z4, z5),
+ z0 = svdot (z0, z4, z5))
+
+/*
+** dot_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f32_tied2, svfloat32_t, svfloat16_t,
+ z0_res = svdot_f32_f16 (z4, z0, z1),
+ z0_res = svdot (z4, z0, z1))
+
+/*
+** dot_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f32_tied3, svfloat32_t, svfloat16_t,
+ z0_res = svdot_f32_f16 (z4, z1, z0),
+ z0_res = svdot (z4, z1, z0))
+
+/*
+** dot_f32_untied:
+** movprfx z0, z1
+** fdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_f32_untied, svfloat32_t, svfloat16_t,
+ z0 = svdot_f32_f16 (z1, z4, z5),
+ z0 = svdot (z1, z4, z5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_lane_0_f32_tied1:
+** fdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f32_tied1, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 0),
+ z0 = svdot_lane (z0, z4, z5, 0))
+
+/*
+** dot_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f32_tied2, svfloat32_t, svfloat16_t,
+ z0_res = svdot_lane_f32_f16 (z4, z0, z1, 0),
+ z0_res = svdot_lane (z4, z0, z1, 0))
+
+/*
+** dot_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f32_tied3, svfloat32_t, svfloat16_t,
+ z0_res = svdot_lane_f32_f16 (z4, z1, z0, 0),
+ z0_res = svdot_lane (z4, z1, z0, 0))
+
+/*
+** dot_lane_0_f32_untied:
+** movprfx z0, z1
+** fdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f32_untied, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z1, z4, z5, 0),
+ z0 = svdot_lane (z1, z4, z5, 0))
+
+/*
+** dot_lane_1_f32:
+** fdot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_f32, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 1),
+ z0 = svdot_lane (z0, z4, z5, 1))
+
+/*
+** dot_lane_2_f32:
+** fdot z0\.s, z4\.h, z5\.h\[2\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_2_f32, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 2),
+ z0 = svdot_lane (z0, z4, z5, 2))
+
+/*
+** dot_lane_3_f32:
+** fdot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_3_f32, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 3),
+ z0 = svdot_lane (z0, z4, z5, 3))
+
+/*
+** dot_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** fdot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_f32, svfloat32_t, svfloat16_t, z8,
+ z0 = svdot_lane_f32_f16 (z0, z1, z8, 1),
+ z0 = svdot_lane (z0, z1, z8, 1))
+
+/*
+** dot_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** fdot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_f32, svfloat32_t, svfloat16_t, z16,
+ z0 = svdot_lane_f32_f16 (z0, z1, z16, 1),
+ z0 = svdot_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_lane_0_s32_tied1:
+** sdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_s32_tied1, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 0),
+ z0 = svdot_lane (z0, z4, z5, 0))
+
+/*
+** dot_lane_0_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_s32_tied2, svint32_t, svint16_t,
+ z0_res = svdot_lane_s32_s16 (z4, z0, z1, 0),
+ z0_res = svdot_lane (z4, z0, z1, 0))
+
+/*
+** dot_lane_0_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_s32_tied3, svint32_t, svint16_t,
+ z0_res = svdot_lane_s32_s16 (z4, z1, z0, 0),
+ z0_res = svdot_lane (z4, z1, z0, 0))
+
+/*
+** dot_lane_0_s32_untied:
+** movprfx z0, z1
+** sdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_s32_untied, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z1, z4, z5, 0),
+ z0 = svdot_lane (z1, z4, z5, 0))
+
+/*
+** dot_lane_1_s32:
+** sdot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_s32, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 1),
+ z0 = svdot_lane (z0, z4, z5, 1))
+
+/*
+** dot_lane_2_s32:
+** sdot z0\.s, z4\.h, z5\.h\[2\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_2_s32, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 2),
+ z0 = svdot_lane (z0, z4, z5, 2))
+
+/*
+** dot_lane_3_s32:
+** sdot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_3_s32, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 3),
+ z0 = svdot_lane (z0, z4, z5, 3))
+
+/*
+** dot_lane_z8_s32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** sdot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_s32, svint32_t, svint16_t, z8,
+ z0 = svdot_lane_s32_s16 (z0, z1, z8, 1),
+ z0 = svdot_lane (z0, z1, z8, 1))
+
+/*
+** dot_lane_z16_s32:
+** mov (z[0-7])\.d, z16\.d
+** sdot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_s32, svint32_t, svint16_t, z16,
+ z0 = svdot_lane_s32_s16 (z0, z1, z16, 1),
+ z0 = svdot_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_lane_0_u32_tied1:
+** udot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_u32_tied1, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 0),
+ z0 = svdot_lane (z0, z4, z5, 0))
+
+/*
+** dot_lane_0_u32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_u32_tied2, svuint32_t, svuint16_t,
+ z0_res = svdot_lane_u32_u16 (z4, z0, z1, 0),
+ z0_res = svdot_lane (z4, z0, z1, 0))
+
+/*
+** dot_lane_0_u32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_u32_tied3, svuint32_t, svuint16_t,
+ z0_res = svdot_lane_u32_u16 (z4, z1, z0, 0),
+ z0_res = svdot_lane (z4, z1, z0, 0))
+
+/*
+** dot_lane_0_u32_untied:
+** movprfx z0, z1
+** udot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_u32_untied, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z1, z4, z5, 0),
+ z0 = svdot_lane (z1, z4, z5, 0))
+
+/*
+** dot_lane_1_u32:
+** udot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_u32, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 1),
+ z0 = svdot_lane (z0, z4, z5, 1))
+
+/*
+** dot_lane_2_u32:
+** udot z0\.s, z4\.h, z5\.h\[2\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_2_u32, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 2),
+ z0 = svdot_lane (z0, z4, z5, 2))
+
+/*
+** dot_lane_3_u32:
+** udot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_3_u32, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 3),
+ z0 = svdot_lane (z0, z4, z5, 3))
+
+/*
+** dot_lane_z8_u32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** udot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_u32, svuint32_t, svuint16_t, z8,
+ z0 = svdot_lane_u32_u16 (z0, z1, z8, 1),
+ z0 = svdot_lane (z0, z1, z8, 1))
+
+/*
+** dot_lane_z16_u32:
+** mov (z[0-7])\.d, z16\.d
+** udot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_u32, svuint32_t, svuint16_t, z16,
+ z0 = svdot_lane_u32_u16 (z0, z1, z16, 1),
+ z0 = svdot_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_s32_tied1:
+** sdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_s32_tied1, svint32_t, svint16_t,
+ z0 = svdot_s32_s16 (z0, z4, z5),
+ z0 = svdot (z0, z4, z5))
+
+/*
+** dot_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_s32_tied2, svint32_t, svint16_t,
+ z0_res = svdot_s32_s16 (z4, z0, z1),
+ z0_res = svdot (z4, z0, z1))
+
+/*
+** dot_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_s32_tied3, svint32_t, svint16_t,
+ z0_res = svdot_s32_s16 (z4, z1, z0),
+ z0_res = svdot (z4, z1, z0))
+
+/*
+** dot_s32_untied:
+** movprfx z0, z1
+** sdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_s32_untied, svint32_t, svint16_t,
+ z0 = svdot_s32_s16 (z1, z4, z5),
+ z0 = svdot (z1, z4, z5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_u32_tied1:
+** udot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_u32_tied1, svuint32_t, svuint16_t,
+ z0 = svdot_u32_u16 (z0, z4, z5),
+ z0 = svdot (z0, z4, z5))
+
+/*
+** dot_u32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_u32_tied2, svuint32_t, svuint16_t,
+ z0_res = svdot_u32_u16 (z4, z0, z1),
+ z0_res = svdot (z4, z0, z1))
+
+/*
+** dot_u32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_u32_tied3, svuint32_t, svuint16_t,
+ z0_res = svdot_u32_u16 (z4, z1, z0),
+ z0_res = svdot (z4, z1, z0))
+
+/*
+** dot_u32_untied:
+** movprfx z0, z1
+** udot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_u32_untied, svuint32_t, svuint16_t,
+ z0 = svdot_u32_u16 (z1, z4, z5),
+ z0 = svdot (z1, z4, z5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_bf16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_bf16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_bf16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_bf16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_bf16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_bf16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ z17 = svld1_bf16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ z22 = svld1_bf16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ z28 = svld1_bf16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_bf16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_bf16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_bf16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_bf16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_bf16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_bf16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_bf16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_bf16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_bf16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_bf16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_bf16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_bf16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_bf16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_bf16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ z17 = svld1_bf16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ z22 = svld1_bf16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ z28 = svld1_bf16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_bf16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_bf16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_bf16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_bf16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_bf16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_bf16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_bf16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_bf16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_base, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_index, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_1, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_f16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_2, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_f16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_14, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_16, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_f16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_f16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_f16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z17, svfloat16x2_t, float16_t,
+ z17 = svld1_f16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z22, svfloat16x2_t, float16_t,
+ z22 = svld1_f16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z28, svfloat16x2_t, float16_t,
+ z28 = svld1_f16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn0, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn7, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_f16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn15, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_f16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_0, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_1, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_f16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_2, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_f16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_14, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_16, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_f16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_f16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_base, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_index, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_1, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_2, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_3, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_f16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_4, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_f16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_28, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_f16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_32, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_f16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_f16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_f16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_f16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z17, svfloat16x4_t, float16_t,
+ z17 = svld1_f16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z22, svfloat16x4_t, float16_t,
+ z22 = svld1_f16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z28, svfloat16x4_t, float16_t,
+ z28 = svld1_f16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn0, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn7, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_f16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn15, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_f16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_0, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_1, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_2, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_3, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_f16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_4, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_f16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_28, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_f16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_32, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_f16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_f16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_f16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f32_base:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_base, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_index:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_index, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_1, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw ()),
+ z0 = svld1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ld1_f32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_2, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ld1_f32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_14, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_16, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw ()),
+ z0 = svld1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ld1_f32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ld1_f32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ld1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ld1_f32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z17, svfloat32x2_t, float32_t,
+ z17 = svld1_f32_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_z22:
+** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z22, svfloat32x2_t, float32_t,
+ z22 = svld1_f32_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_z28:
+** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z28, svfloat32x2_t, float32_t,
+ z28 = svld1_f32_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn0, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn7, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_f32_pn15:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn15, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_f32_0:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_0, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_1, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_f32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_2, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_f32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_14, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_16, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_f32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_f32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f32_base:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_base, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_index:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_index, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_1, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw ()),
+ z0 = svld1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_2, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_3, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ld1_f32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_4, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ld1_f32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_28, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ld1_f32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_32, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw ()),
+ z0 = svld1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ld1_f32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ld1_f32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ld1_f32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ld1_f32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z17, svfloat32x4_t, float32_t,
+ z17 = svld1_f32_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_z22:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z22, svfloat32x4_t, float32_t,
+ z22 = svld1_f32_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_z28:
+** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z28, svfloat32x4_t, float32_t,
+ z28 = svld1_f32_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn0, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn7, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_f32_pn15:
+** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn15, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_f32_0:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_0, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_1, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_2, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_3, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_f32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_4, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_f32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_28, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_f32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_32, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_f32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_f32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_f32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f64_base:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_base, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_index:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_index, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_1, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd ()),
+ z0 = svld1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ld1_f64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_2, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ld1_f64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_14, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_16, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd ()),
+ z0 = svld1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ld1_f64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ld1_f64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ld1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ld1_f64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z17, svfloat64x2_t, float64_t,
+ z17 = svld1_f64_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_z22:
+** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z22, svfloat64x2_t, float64_t,
+ z22 = svld1_f64_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_z28:
+** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z28, svfloat64x2_t, float64_t,
+ z28 = svld1_f64_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn0, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn7, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_f64_pn15:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn15, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_f64_0:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_0, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_1, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_f64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_2, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_f64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_14, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_16, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_f64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_f64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f64_base:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_base, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_index:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_index, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_1, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd ()),
+ z0 = svld1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_2, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_3, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ld1_f64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_4, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ld1_f64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_28, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ld1_f64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_32, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd ()),
+ z0 = svld1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ld1_f64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ld1_f64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ld1_f64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ld1_f64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z17, svfloat64x4_t, float64_t,
+ z17 = svld1_f64_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_z22:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z22, svfloat64x4_t, float64_t,
+ z22 = svld1_f64_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_z28:
+** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z28, svfloat64x4_t, float64_t,
+ z28 = svld1_f64_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn0, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn7, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_f64_pn15:
+** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn15, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_f64_0:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_0, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_1, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_2, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_3, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_f64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_4, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_f64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_28, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_f64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_32, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_f64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_f64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_f64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_base, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_index, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_1, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_s16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_2, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_s16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_14, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_16, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m1, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_s16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m2, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_s16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m16, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m18, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_s16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z17, svint16x2_t, int16_t,
+ z17 = svld1_s16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z22, svint16x2_t, int16_t,
+ z22 = svld1_s16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z28, svint16x2_t, int16_t,
+ z28 = svld1_s16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn0, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn7, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn15, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_0, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_1, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_2, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_14, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_16, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m1, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m2, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m16, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m18, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_x1, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_base, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_index, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_1, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_2, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_3, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_s16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_4, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_s16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_28, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_s16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_32, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m1, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m2, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m3, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_s16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s16_m4, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_s16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m32, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_s16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m36, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_s16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z17, svint16x4_t, int16_t,
+ z17 = svld1_s16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z22, svint16x4_t, int16_t,
+ z22 = svld1_s16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z28, svint16x4_t, int16_t,
+ z28 = svld1_s16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn0, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn7, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn15, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_0, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_1, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_2, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_3, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_4, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_28, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_32, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m1, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m2, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m3, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m4, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m32, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m36, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_x1, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s32_base:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_base, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_index:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_index, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_1, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw ()),
+ z0 = svld1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ld1_s32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_2, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ld1_s32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_14, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_16, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m1, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw ()),
+ z0 = svld1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ld1_s32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m2, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ld1_s32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m16, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ld1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m18, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ld1_s32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z17, svint32x2_t, int32_t,
+ z17 = svld1_s32_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_z22:
+** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z22, svint32x2_t, int32_t,
+ z22 = svld1_s32_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_z28:
+** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z28, svint32x2_t, int32_t,
+ z28 = svld1_s32_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn0, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn7, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s32_pn15:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn15, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s32_0:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_0, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_1, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_2, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_14, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_16, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m1, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m2, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m16, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m18, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_x1, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s32_base:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_base, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_index:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_index, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_1, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw ()),
+ z0 = svld1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_2, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_3, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ld1_s32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_4, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ld1_s32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_28, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ld1_s32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_32, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m1, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw ()),
+ z0 = svld1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m2, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m3, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ld1_s32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s32_m4, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ld1_s32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m32, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ld1_s32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m36, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ld1_s32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z17, svint32x4_t, int32_t,
+ z17 = svld1_s32_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_z22:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z22, svint32x4_t, int32_t,
+ z22 = svld1_s32_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_z28:
+** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z28, svint32x4_t, int32_t,
+ z28 = svld1_s32_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn0, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn7, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s32_pn15:
+** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn15, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s32_0:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_0, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_1, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_2, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_3, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_4, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_28, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_32, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m1, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m2, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m3, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m4, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m32, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m36, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_x1, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s64_base:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_base, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_index:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_index, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_1, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd ()),
+ z0 = svld1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ld1_s64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_2, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ld1_s64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_14, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_16, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m1, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd ()),
+ z0 = svld1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ld1_s64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m2, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ld1_s64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m16, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ld1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m18, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ld1_s64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z17, svint64x2_t, int64_t,
+ z17 = svld1_s64_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_z22:
+** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z22, svint64x2_t, int64_t,
+ z22 = svld1_s64_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_z28:
+** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z28, svint64x2_t, int64_t,
+ z28 = svld1_s64_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn0, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn7, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s64_pn15:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn15, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s64_0:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_0, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_1, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_2, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_14, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_16, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m1, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m2, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m16, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m18, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_x1, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s64_base:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_base, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_index:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_index, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_1, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd ()),
+ z0 = svld1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_2, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_3, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ld1_s64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_4, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ld1_s64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_28, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ld1_s64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_32, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m1, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd ()),
+ z0 = svld1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m2, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m3, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ld1_s64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s64_m4, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ld1_s64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m32, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ld1_s64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m36, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ld1_s64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z17, svint64x4_t, int64_t,
+ z17 = svld1_s64_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_z22:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z22, svint64x4_t, int64_t,
+ z22 = svld1_s64_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_z28:
+** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z28, svint64x4_t, int64_t,
+ z28 = svld1_s64_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn0, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn7, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s64_pn15:
+** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn15, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s64_0:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_0, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_1, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_2, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_3, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_4, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_28, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_32, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m1, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m2, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m3, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m4, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m32, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m36, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_x1, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s8_base:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_base, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_index:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_index, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_1, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb ()),
+ z0 = svld1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ld1_s8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_2, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ld1_s8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_14, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_16, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m1, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb ()),
+ z0 = svld1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ld1_s8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m2, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ld1_s8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m16, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ld1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m18, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ld1_s8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z17, svint8x2_t, int8_t,
+ z17 = svld1_s8_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_z22:
+** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z22, svint8x2_t, int8_t,
+ z22 = svld1_s8_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_z28:
+** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z28, svint8x2_t, int8_t,
+ z28 = svld1_s8_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn0, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn7, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s8_pn15:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn15, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s8_0:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_0, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_1, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_2, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_14, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_16, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m1, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m2, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m16, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m18, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_x1, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s8_base:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_base, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_index:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_index, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_1, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb ()),
+ z0 = svld1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_2, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_3, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ld1_s8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_4, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ld1_s8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_28, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ld1_s8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_32, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m1, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb ()),
+ z0 = svld1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m2, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m3, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ld1_s8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s8_m4, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ld1_s8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m32, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ld1_s8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m36, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ld1_s8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z17, svint8x4_t, int8_t,
+ z17 = svld1_s8_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_z22:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z22, svint8x4_t, int8_t,
+ z22 = svld1_s8_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_z28:
+** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z28, svint8x4_t, int8_t,
+ z28 = svld1_s8_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn0, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn7, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s8_pn15:
+** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn15, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s8_0:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_0, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_1, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_2, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_3, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_4, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_28, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_32, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m1, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m2, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m3, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m4, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m32, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m36, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_x1, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_base, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_index, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_1, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_u16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_2, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_u16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_14, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_16, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_u16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_u16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_u16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z17, svuint16x2_t, uint16_t,
+ z17 = svld1_u16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z22, svuint16x2_t, uint16_t,
+ z22 = svld1_u16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z28, svuint16x2_t, uint16_t,
+ z28 = svld1_u16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn0, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn7, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn15, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_0, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_1, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_2, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_14, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_16, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_base, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_index, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_1, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_2, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_3, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_u16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_4, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_u16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_28, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_u16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_32, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_u16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_u16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_u16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_u16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z17, svuint16x4_t, uint16_t,
+ z17 = svld1_u16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z22, svuint16x4_t, uint16_t,
+ z22 = svld1_u16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z28, svuint16x4_t, uint16_t,
+ z28 = svld1_u16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn0, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn7, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn15, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_0, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_1, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_2, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_3, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_4, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_28, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_32, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u32_base:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_base, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_index:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_index, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_1, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw ()),
+ z0 = svld1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ld1_u32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_2, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ld1_u32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_14, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_16, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw ()),
+ z0 = svld1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ld1_u32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ld1_u32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ld1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ld1_u32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z17, svuint32x2_t, uint32_t,
+ z17 = svld1_u32_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_z22:
+** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z22, svuint32x2_t, uint32_t,
+ z22 = svld1_u32_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_z28:
+** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z28, svuint32x2_t, uint32_t,
+ z28 = svld1_u32_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn0, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn7, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u32_pn15:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn15, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u32_0:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_0, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_1, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_2, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_14, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_16, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u32_base:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_base, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_index:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_index, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_1, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw ()),
+ z0 = svld1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_2, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_3, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ld1_u32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_4, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ld1_u32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_28, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ld1_u32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_32, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw ()),
+ z0 = svld1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ld1_u32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ld1_u32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ld1_u32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ld1_u32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z17, svuint32x4_t, uint32_t,
+ z17 = svld1_u32_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_z22:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z22, svuint32x4_t, uint32_t,
+ z22 = svld1_u32_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_z28:
+** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z28, svuint32x4_t, uint32_t,
+ z28 = svld1_u32_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn0, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn7, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u32_pn15:
+** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn15, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u32_0:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_0, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_1, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_2, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_3, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_4, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_28, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_32, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u64_base:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_base, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_index:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_index, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_1, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd ()),
+ z0 = svld1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ld1_u64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_2, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ld1_u64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_14, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_16, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd ()),
+ z0 = svld1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ld1_u64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ld1_u64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ld1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ld1_u64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z17, svuint64x2_t, uint64_t,
+ z17 = svld1_u64_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_z22:
+** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z22, svuint64x2_t, uint64_t,
+ z22 = svld1_u64_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_z28:
+** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z28, svuint64x2_t, uint64_t,
+ z28 = svld1_u64_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn0, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn7, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u64_pn15:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn15, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u64_0:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_0, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_1, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_2, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_14, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_16, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u64_base:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_base, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_index:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_index, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_1, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd ()),
+ z0 = svld1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_2, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_3, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ld1_u64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_4, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ld1_u64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_28, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ld1_u64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_32, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd ()),
+ z0 = svld1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ld1_u64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ld1_u64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ld1_u64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ld1_u64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z17, svuint64x4_t, uint64_t,
+ z17 = svld1_u64_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_z22:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z22, svuint64x4_t, uint64_t,
+ z22 = svld1_u64_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_z28:
+** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z28, svuint64x4_t, uint64_t,
+ z28 = svld1_u64_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn0, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn7, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u64_pn15:
+** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn15, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u64_0:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_0, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_1, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_2, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_3, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_4, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_28, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_32, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u8_base:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_base, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_index:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_index, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_1, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb ()),
+ z0 = svld1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ld1_u8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_2, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ld1_u8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_14, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_16, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb ()),
+ z0 = svld1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ld1_u8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ld1_u8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ld1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ld1_u8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z17, svuint8x2_t, uint8_t,
+ z17 = svld1_u8_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_z22:
+** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z22, svuint8x2_t, uint8_t,
+ z22 = svld1_u8_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_z28:
+** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z28, svuint8x2_t, uint8_t,
+ z28 = svld1_u8_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn0, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn7, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u8_pn15:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn15, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u8_0:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_0, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_1, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_2, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_14, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_16, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u8_base:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_base, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_index:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_index, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_1, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb ()),
+ z0 = svld1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_2, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_3, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ld1_u8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_4, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ld1_u8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_28, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ld1_u8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_32, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb ()),
+ z0 = svld1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ld1_u8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ld1_u8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ld1_u8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ld1_u8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z17, svuint8x4_t, uint8_t,
+ z17 = svld1_u8_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_z22:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z22, svuint8x4_t, uint8_t,
+ z22 = svld1_u8_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_z28:
+** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z28, svuint8x4_t, uint8_t,
+ z28 = svld1_u8_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn0, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn7, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u8_pn15:
+** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn15, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u8_0:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_0, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_1, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_2, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_3, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_4, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_28, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_32, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_bf16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_bf16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_bf16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_bf16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_bf16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_bf16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ z17 = svldnt1_bf16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ z22 = svldnt1_bf16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ z28 = svldnt1_bf16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_bf16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_bf16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_bf16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_bf16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_bf16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_bf16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_bf16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_bf16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_bf16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_bf16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_bf16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_bf16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_bf16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_bf16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ z17 = svldnt1_bf16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ z22 = svldnt1_bf16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ z28 = svldnt1_bf16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_bf16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_bf16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_bf16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_bf16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_bf16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_bf16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_bf16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_bf16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_base, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_index, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_f16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_f16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_14, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_f16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_f16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_f16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z17, svfloat16x2_t, float16_t,
+ z17 = svldnt1_f16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z22, svfloat16x2_t, float16_t,
+ z22 = svldnt1_f16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z28, svfloat16x2_t, float16_t,
+ z28 = svldnt1_f16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn0, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn7, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_f16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn15, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_f16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_0, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_f16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_f16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_14, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_f16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_f16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_base, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_index, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_f16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_f16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_28, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_f16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_f16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_f16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_f16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_f16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z17, svfloat16x4_t, float16_t,
+ z17 = svldnt1_f16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z22, svfloat16x4_t, float16_t,
+ z22 = svldnt1_f16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z28, svfloat16x4_t, float16_t,
+ z28 = svldnt1_f16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn0, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn7, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_f16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn15, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_f16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_0, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_f16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_f16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_28, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_f16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_f16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_f16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_f16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f32_base:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_base, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_index:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_index, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ldnt1_f32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ldnt1_f32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_14, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ldnt1_f32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ldnt1_f32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ldnt1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ldnt1_f32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z17, svfloat32x2_t, float32_t,
+ z17 = svldnt1_f32_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_z22:
+** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z22, svfloat32x2_t, float32_t,
+ z22 = svldnt1_f32_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_z28:
+** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z28, svfloat32x2_t, float32_t,
+ z28 = svldnt1_f32_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn0, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn7, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_f32_pn15:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn15, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_f32_0:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_0, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_f32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_f32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_14, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_f32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_f32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f32_base:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_base, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_index:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_index, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ldnt1_f32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ldnt1_f32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_28, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ldnt1_f32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ldnt1_f32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ldnt1_f32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ldnt1_f32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ldnt1_f32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z17, svfloat32x4_t, float32_t,
+ z17 = svldnt1_f32_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_z22:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z22, svfloat32x4_t, float32_t,
+ z22 = svldnt1_f32_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_z28:
+** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z28, svfloat32x4_t, float32_t,
+ z28 = svldnt1_f32_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn0, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn7, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_f32_pn15:
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn15, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_f32_0:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_0, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_f32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_f32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_28, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_f32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_f32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_f32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_f32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f64_base:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_base, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_index:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_index, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ldnt1_f64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ldnt1_f64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_14, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ldnt1_f64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ldnt1_f64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ldnt1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ldnt1_f64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z17, svfloat64x2_t, float64_t,
+ z17 = svldnt1_f64_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_z22:
+** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z22, svfloat64x2_t, float64_t,
+ z22 = svldnt1_f64_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_z28:
+** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z28, svfloat64x2_t, float64_t,
+ z28 = svldnt1_f64_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn0, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn7, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_f64_pn15:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn15, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_f64_0:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_0, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_f64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_f64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_14, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_f64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_f64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f64_base:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_base, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_index:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_index, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ldnt1_f64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ldnt1_f64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_28, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ldnt1_f64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ldnt1_f64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ldnt1_f64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ldnt1_f64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ldnt1_f64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z17, svfloat64x4_t, float64_t,
+ z17 = svldnt1_f64_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_z22:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z22, svfloat64x4_t, float64_t,
+ z22 = svldnt1_f64_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_z28:
+** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z28, svfloat64x4_t, float64_t,
+ z28 = svldnt1_f64_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn0, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn7, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_f64_pn15:
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn15, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_f64_0:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_0, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_f64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_f64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_28, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_f64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_f64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_f64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_f64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_base, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_index, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_1, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_s16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_2, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_s16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_14, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_16, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m1, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_s16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m2, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_s16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m16, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m18, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_s16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z17, svint16x2_t, int16_t,
+ z17 = svldnt1_s16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z22, svint16x2_t, int16_t,
+ z22 = svldnt1_s16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z28, svint16x2_t, int16_t,
+ z28 = svldnt1_s16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn0, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn7, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn15, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_0, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_1, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_2, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_14, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_16, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m1, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m2, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m16, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m18, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_x1, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_base, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_index, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_1, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_2, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_3, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_s16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_4, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_s16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_28, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_s16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_32, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m1, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m2, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m3, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_s16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s16_m4, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_s16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m32, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_s16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m36, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_s16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z17, svint16x4_t, int16_t,
+ z17 = svldnt1_s16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z22, svint16x4_t, int16_t,
+ z22 = svldnt1_s16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z28, svint16x4_t, int16_t,
+ z28 = svldnt1_s16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn0, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn7, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn15, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_0, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_1, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_2, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_3, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_4, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_28, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_32, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m1, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m2, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m3, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m4, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m32, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m36, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_x1, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s32_base:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_base, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_index:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_index, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_1, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ldnt1_s32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_2, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ldnt1_s32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_14, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_16, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m1, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ldnt1_s32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m2, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ldnt1_s32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m16, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ldnt1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m18, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ldnt1_s32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z17, svint32x2_t, int32_t,
+ z17 = svldnt1_s32_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_z22:
+** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z22, svint32x2_t, int32_t,
+ z22 = svldnt1_s32_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_z28:
+** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z28, svint32x2_t, int32_t,
+ z28 = svldnt1_s32_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn0, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn7, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s32_pn15:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn15, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s32_0:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_0, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_1, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_2, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_14, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_16, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m1, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m2, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m16, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m18, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_x1, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s32_base:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_base, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_index:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_index, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_1, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_2, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_3, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ldnt1_s32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_4, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ldnt1_s32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_28, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ldnt1_s32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_32, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m1, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m2, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m3, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ldnt1_s32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s32_m4, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ldnt1_s32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m32, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ldnt1_s32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m36, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ldnt1_s32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z17, svint32x4_t, int32_t,
+ z17 = svldnt1_s32_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_z22:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z22, svint32x4_t, int32_t,
+ z22 = svldnt1_s32_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_z28:
+** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z28, svint32x4_t, int32_t,
+ z28 = svldnt1_s32_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn0, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn7, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s32_pn15:
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn15, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s32_0:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_0, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_1, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_2, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_3, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_4, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_28, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_32, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m1, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m2, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m3, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m4, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m32, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m36, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_x1, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s64_base:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_base, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_index:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_index, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_1, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ldnt1_s64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_2, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ldnt1_s64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_14, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_16, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m1, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ldnt1_s64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m2, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ldnt1_s64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m16, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ldnt1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m18, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ldnt1_s64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z17, svint64x2_t, int64_t,
+ z17 = svldnt1_s64_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_z22:
+** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z22, svint64x2_t, int64_t,
+ z22 = svldnt1_s64_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_z28:
+** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z28, svint64x2_t, int64_t,
+ z28 = svldnt1_s64_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn0, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn7, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s64_pn15:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn15, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s64_0:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_0, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_1, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_2, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_14, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_16, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m1, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m2, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m16, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m18, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_x1, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s64_base:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_base, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_index:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_index, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_1, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_2, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_3, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ldnt1_s64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_4, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ldnt1_s64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_28, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ldnt1_s64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_32, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m1, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m2, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m3, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ldnt1_s64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s64_m4, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ldnt1_s64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m32, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ldnt1_s64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m36, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ldnt1_s64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z17, svint64x4_t, int64_t,
+ z17 = svldnt1_s64_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_z22:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z22, svint64x4_t, int64_t,
+ z22 = svldnt1_s64_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_z28:
+** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z28, svint64x4_t, int64_t,
+ z28 = svldnt1_s64_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn0, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn7, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s64_pn15:
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn15, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s64_0:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_0, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_1, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_2, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_3, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_4, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_28, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_32, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m1, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m2, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m3, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m4, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m32, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m36, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_x1, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s8_base:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_base, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_index:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_index, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_1, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ldnt1_s8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_2, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ldnt1_s8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_14, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_16, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m1, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ldnt1_s8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m2, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ldnt1_s8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m16, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ldnt1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m18, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ldnt1_s8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z17, svint8x2_t, int8_t,
+ z17 = svldnt1_s8_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_z22:
+** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z22, svint8x2_t, int8_t,
+ z22 = svldnt1_s8_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_z28:
+** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z28, svint8x2_t, int8_t,
+ z28 = svldnt1_s8_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn0, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn7, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s8_pn15:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn15, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s8_0:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_0, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_1, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_2, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_14, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_16, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m1, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m2, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m16, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m18, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_x1, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s8_base:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_base, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_index:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_index, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_1, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_2, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_3, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ldnt1_s8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_4, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ldnt1_s8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_28, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ldnt1_s8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_32, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m1, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m2, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m3, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ldnt1_s8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s8_m4, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ldnt1_s8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m32, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ldnt1_s8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m36, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ldnt1_s8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z17, svint8x4_t, int8_t,
+ z17 = svldnt1_s8_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_z22:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z22, svint8x4_t, int8_t,
+ z22 = svldnt1_s8_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_z28:
+** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z28, svint8x4_t, int8_t,
+ z28 = svldnt1_s8_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn0, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn7, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s8_pn15:
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn15, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s8_0:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_0, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_1, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_2, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_3, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_4, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_28, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_32, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m1, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m2, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m3, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m4, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m32, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m36, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_x1, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_base, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_index, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_u16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_u16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_14, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_u16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_u16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_u16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z17, svuint16x2_t, uint16_t,
+ z17 = svldnt1_u16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z22, svuint16x2_t, uint16_t,
+ z22 = svldnt1_u16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z28, svuint16x2_t, uint16_t,
+ z28 = svldnt1_u16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn0, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn7, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn15, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_0, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_14, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_base, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_index, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_u16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_u16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_28, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_u16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_u16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_u16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_u16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_u16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z17, svuint16x4_t, uint16_t,
+ z17 = svldnt1_u16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z22, svuint16x4_t, uint16_t,
+ z22 = svldnt1_u16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z28, svuint16x4_t, uint16_t,
+ z28 = svldnt1_u16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn0, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn7, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn15, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_0, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_28, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u32_base:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_base, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_index:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_index, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ldnt1_u32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ldnt1_u32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_14, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ldnt1_u32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ldnt1_u32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ldnt1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ldnt1_u32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z17, svuint32x2_t, uint32_t,
+ z17 = svldnt1_u32_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_z22:
+** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z22, svuint32x2_t, uint32_t,
+ z22 = svldnt1_u32_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_z28:
+** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z28, svuint32x2_t, uint32_t,
+ z28 = svldnt1_u32_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn0, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn7, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u32_pn15:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn15, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u32_0:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_0, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_14, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u32_base:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_base, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_index:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_index, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ldnt1_u32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ldnt1_u32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_28, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ldnt1_u32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ldnt1_u32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ldnt1_u32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ldnt1_u32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ldnt1_u32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z17, svuint32x4_t, uint32_t,
+ z17 = svldnt1_u32_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_z22:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z22, svuint32x4_t, uint32_t,
+ z22 = svldnt1_u32_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_z28:
+** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z28, svuint32x4_t, uint32_t,
+ z28 = svldnt1_u32_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn0, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn7, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u32_pn15:
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn15, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u32_0:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_0, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_28, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u64_base:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_base, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_index:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_index, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ldnt1_u64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ldnt1_u64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_14, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ldnt1_u64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ldnt1_u64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ldnt1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ldnt1_u64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z17, svuint64x2_t, uint64_t,
+ z17 = svldnt1_u64_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_z22:
+** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z22, svuint64x2_t, uint64_t,
+ z22 = svldnt1_u64_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_z28:
+** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z28, svuint64x2_t, uint64_t,
+ z28 = svldnt1_u64_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn0, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn7, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u64_pn15:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn15, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u64_0:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_0, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_14, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u64_base:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_base, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_index:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_index, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ldnt1_u64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ldnt1_u64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_28, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ldnt1_u64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ldnt1_u64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ldnt1_u64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ldnt1_u64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ldnt1_u64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z17, svuint64x4_t, uint64_t,
+ z17 = svldnt1_u64_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_z22:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z22, svuint64x4_t, uint64_t,
+ z22 = svldnt1_u64_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_z28:
+** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z28, svuint64x4_t, uint64_t,
+ z28 = svldnt1_u64_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn0, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn7, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u64_pn15:
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn15, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u64_0:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_0, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_28, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u8_base:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_base, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_index:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_index, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ldnt1_u8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ldnt1_u8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_14, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ldnt1_u8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ldnt1_u8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ldnt1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ldnt1_u8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z17, svuint8x2_t, uint8_t,
+ z17 = svldnt1_u8_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_z22:
+** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z22, svuint8x2_t, uint8_t,
+ z22 = svldnt1_u8_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_z28:
+** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z28, svuint8x2_t, uint8_t,
+ z28 = svldnt1_u8_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn0, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn7, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u8_pn15:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn15, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u8_0:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_0, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_14, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u8_base:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_base, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_index:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_index, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ldnt1_u8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ldnt1_u8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_28, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ldnt1_u8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ldnt1_u8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ldnt1_u8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ldnt1_u8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ldnt1_u8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z17, svuint8x4_t, uint8_t,
+ z17 = svldnt1_u8_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_z22:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z22, svuint8x4_t, uint8_t,
+ z22 = svldnt1_u8_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_z28:
+** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z28, svuint8x4_t, uint8_t,
+ z28 = svldnt1_u8_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn0, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn7, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u8_pn15:
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn15, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u8_0:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_0, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_28, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.h, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svbool_t,
+ p2 = svpext_lane_c16 (pn0, 0),
+ p2 = svpext_lane_c16 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.h, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svbool_t,
+ p5 = svpext_lane_c16 (pn7, 1),
+ p5 = svpext_lane_c16 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_2:
+** pext p9\.h, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_2, svbool_t,
+ p9 = svpext_lane_c16 (pn8, 2),
+ p9 = svpext_lane_c16 (pn8, 2))
+
+/*
+** pext_lane_p12_pn11_3:
+** pext p12\.h, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_3, svbool_t,
+ p12 = svpext_lane_c16 (pn11, 3),
+ p12 = svpext_lane_c16 (pn11, 3))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext p2\.h, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svbool_t,
+ p2 = svpext_lane_c16 (pn15, 0),
+ p2 = svpext_lane_c16 (pn15, 0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.h, p3\.h}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svboolx2_t,
+ p2 = svpext_lane_c16_x2 (pn0, 0),
+ p2 = svpext_lane_c16_x2 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svboolx2_t,
+ p5 = svpext_lane_c16_x2 (pn7, 1),
+ p5 = svpext_lane_c16_x2 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_0, svboolx2_t,
+ p9 = svpext_lane_c16_x2 (pn8, 0),
+ p9 = svpext_lane_c16_x2 (pn8, 0))
+
+/*
+** pext_lane_p12_pn11_1:
+** pext {p12\.h, p13\.h}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_1, svboolx2_t,
+ p12 = svpext_lane_c16_x2 (pn11, 1),
+ p12 = svpext_lane_c16_x2 (pn11, 1))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext {p2\.h, p3\.h}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svboolx2_t,
+ p2 = svpext_lane_c16_x2 (pn15, 0),
+ p2 = svpext_lane_c16_x2 (pn15, 0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.s, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svbool_t,
+ p2 = svpext_lane_c32 (pn0, 0),
+ p2 = svpext_lane_c32 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.s, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svbool_t,
+ p5 = svpext_lane_c32 (pn7, 1),
+ p5 = svpext_lane_c32 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_2:
+** pext p9\.s, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_2, svbool_t,
+ p9 = svpext_lane_c32 (pn8, 2),
+ p9 = svpext_lane_c32 (pn8, 2))
+
+/*
+** pext_lane_p12_pn11_3:
+** pext p12\.s, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_3, svbool_t,
+ p12 = svpext_lane_c32 (pn11, 3),
+ p12 = svpext_lane_c32 (pn11, 3))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext p2\.s, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svbool_t,
+ p2 = svpext_lane_c32 (pn15, 0),
+ p2 = svpext_lane_c32 (pn15, 0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.s, p3\.s}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svboolx2_t,
+ p2 = svpext_lane_c32_x2 (pn0, 0),
+ p2 = svpext_lane_c32_x2 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svboolx2_t,
+ p5 = svpext_lane_c32_x2 (pn7, 1),
+ p5 = svpext_lane_c32_x2 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_0, svboolx2_t,
+ p9 = svpext_lane_c32_x2 (pn8, 0),
+ p9 = svpext_lane_c32_x2 (pn8, 0))
+
+/*
+** pext_lane_p12_pn11_1:
+** pext {p12\.s, p13\.s}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_1, svboolx2_t,
+ p12 = svpext_lane_c32_x2 (pn11, 1),
+ p12 = svpext_lane_c32_x2 (pn11, 1))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext {p2\.s, p3\.s}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svboolx2_t,
+ p2 = svpext_lane_c32_x2 (pn15, 0),
+ p2 = svpext_lane_c32_x2 (pn15, 0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.d, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svbool_t,
+ p2 = svpext_lane_c64 (pn0, 0),
+ p2 = svpext_lane_c64 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.d, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svbool_t,
+ p5 = svpext_lane_c64 (pn7, 1),
+ p5 = svpext_lane_c64 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_2:
+** pext p9\.d, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_2, svbool_t,
+ p9 = svpext_lane_c64 (pn8, 2),
+ p9 = svpext_lane_c64 (pn8, 2))
+
+/*
+** pext_lane_p12_pn11_3:
+** pext p12\.d, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_3, svbool_t,
+ p12 = svpext_lane_c64 (pn11, 3),
+ p12 = svpext_lane_c64 (pn11, 3))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext p2\.d, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svbool_t,
+ p2 = svpext_lane_c64 (pn15, 0),
+ p2 = svpext_lane_c64 (pn15, 0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.d, p3\.d}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svboolx2_t,
+ p2 = svpext_lane_c64_x2 (pn0, 0),
+ p2 = svpext_lane_c64_x2 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svboolx2_t,
+ p5 = svpext_lane_c64_x2 (pn7, 1),
+ p5 = svpext_lane_c64_x2 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_0, svboolx2_t,
+ p9 = svpext_lane_c64_x2 (pn8, 0),
+ p9 = svpext_lane_c64_x2 (pn8, 0))
+
+/*
+** pext_lane_p12_pn11_1:
+** pext {p12\.d, p13\.d}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_1, svboolx2_t,
+ p12 = svpext_lane_c64_x2 (pn11, 1),
+ p12 = svpext_lane_c64_x2 (pn11, 1))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext {p2\.d, p3\.d}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svboolx2_t,
+ p2 = svpext_lane_c64_x2 (pn15, 0),
+ p2 = svpext_lane_c64_x2 (pn15, 0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.b, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svbool_t,
+ p2 = svpext_lane_c8 (pn0, 0),
+ p2 = svpext_lane_c8 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.b, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svbool_t,
+ p5 = svpext_lane_c8 (pn7, 1),
+ p5 = svpext_lane_c8 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_2:
+** pext p9\.b, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_2, svbool_t,
+ p9 = svpext_lane_c8 (pn8, 2),
+ p9 = svpext_lane_c8 (pn8, 2))
+
+/*
+** pext_lane_p12_pn11_3:
+** pext p12\.b, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_3, svbool_t,
+ p12 = svpext_lane_c8 (pn11, 3),
+ p12 = svpext_lane_c8 (pn11, 3))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext p2\.b, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svbool_t,
+ p2 = svpext_lane_c8 (pn15, 0),
+ p2 = svpext_lane_c8 (pn15, 0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.b, p3\.b}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svboolx2_t,
+ p2 = svpext_lane_c8_x2 (pn0, 0),
+ p2 = svpext_lane_c8_x2 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svboolx2_t,
+ p5 = svpext_lane_c8_x2 (pn7, 1),
+ p5 = svpext_lane_c8_x2 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_0, svboolx2_t,
+ p9 = svpext_lane_c8_x2 (pn8, 0),
+ p9 = svpext_lane_c8_x2 (pn8, 0))
+
+/*
+** pext_lane_p12_pn11_1:
+** pext {p12\.b, p13\.b}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_1, svboolx2_t,
+ p12 = svpext_lane_c8_x2 (pn11, 1),
+ p12 = svpext_lane_c8_x2 (pn11, 1))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext {p2\.b, p3\.b}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svboolx2_t,
+ p2 = svpext_lane_c8_x2 (pn15, 0),
+ p2 = svpext_lane_c8_x2 (pn15, 0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.h
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c16 (),
+ pn0 = svptrue_c16 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.h
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c16 (),
+ pn7 = svptrue_c16 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.h
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c16 (),
+ pn8 = svptrue_c16 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.h
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c16 (),
+ pn15 = svptrue_c16 ())
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.s
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c32 (),
+ pn0 = svptrue_c32 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.s
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c32 (),
+ pn7 = svptrue_c32 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.s
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c32 (),
+ pn8 = svptrue_c32 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.s
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c32 (),
+ pn15 = svptrue_c32 ())
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.d
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c64 (),
+ pn0 = svptrue_c64 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.d
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c64 (),
+ pn7 = svptrue_c64 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.d
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c64 (),
+ pn8 = svptrue_c64 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.d
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c64 (),
+ pn15 = svptrue_c64 ())
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.b
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c8 (),
+ pn0 = svptrue_c8 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.b
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c8 (),
+ pn7 = svptrue_c8 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.b
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c8 (),
+ pn8 = svptrue_c8 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.b
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c8 (),
+ pn15 = svptrue_c8 ())
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qcvtn_z0_z0:
+** sqcvtn z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z0, svint32x2_t, svint16_t,
+ z0_res = svqcvtn_s16_s32_x2 (z0),
+ z0_res = svqcvtn_s16 (z0))
+
+/*
+** qcvtn_z0_z6:
+** sqcvtn z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z6, svint32x2_t, svint16_t,
+ z0_res = svqcvtn_s16_s32_x2 (z6),
+ z0_res = svqcvtn_s16 (z6))
+
+/*
+** qcvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z29, svint32x2_t, svint16_t,
+ z0_res = svqcvtn_s16_s32_x2 (z29),
+ z0_res = svqcvtn_s16 (z29))
+
+/*
+** qcvtn_z5_z0:
+** sqcvtn z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z5_z0, svint32x2_t, svint16_t,
+ z5 = svqcvtn_s16_s32_x2 (z0),
+ z5 = svqcvtn_s16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtn z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z22_z16, svint32x2_t, svint16_t,
+ z22 = svqcvtn_s16_s32_x2 (z16),
+ z22 = svqcvtn_s16 (z16))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qcvtn_z0_z0:
+** sqcvtun z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z0, svint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_s32_x2 (z0),
+ z0_res = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z0_z6:
+** sqcvtun z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z6, svint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_s32_x2 (z6),
+ z0_res = svqcvtn_u16 (z6))
+
+/*
+** qcvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtun z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z29, svint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_s32_x2 (z29),
+ z0_res = svqcvtn_u16 (z29))
+
+/*
+** qcvtn_z5_z0:
+** sqcvtun z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z5_z0, svint32x2_t, svuint16_t,
+ z5 = svqcvtn_u16_s32_x2 (z0),
+ z5 = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtun z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z22_z16, svint32x2_t, svuint16_t,
+ z22 = svqcvtn_u16_s32_x2 (z16),
+ z22 = svqcvtn_u16 (z16))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qcvtn_z0_z0:
+** uqcvtn z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z0, svuint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_u32_x2 (z0),
+ z0_res = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z0_z6:
+** uqcvtn z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z6, svuint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_u32_x2 (z6),
+ z0_res = svqcvtn_u16 (z6))
+
+/*
+** qcvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** uqcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z29, svuint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_u32_x2 (z29),
+ z0_res = svqcvtn_u16 (z29))
+
+/*
+** qcvtn_z5_z0:
+** uqcvtn z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z5_z0, svuint32x2_t, svuint16_t,
+ z5 = svqcvtn_u16_u32_x2 (z0),
+ z5 = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** uqcvtn z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z22_z16, svuint32x2_t, svuint16_t,
+ z22 = svqcvtn_u16_u32_x2 (z16),
+ z22 = svqcvtn_u16 (z16))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qrshrn_z0_z0_1:
+** sqrshrn z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z0_1, svint32x2_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s32_x2 (z0, 1),
+ z0_res = svqrshrn_s16 (z0, 1))
+
+/*
+** qrshrn_z0_z6_16:
+** sqrshrn z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z6_16, svint32x2_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s32_x2 (z6, 16),
+ z0_res = svqrshrn_s16 (z6, 16))
+
+/*
+** qrshrn_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrn z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z29_13, svint32x2_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s32_x2 (z29, 13),
+ z0_res = svqrshrn_s16 (z29, 13))
+
+/*
+** qrshrn_z5_z0_11:
+** sqrshrn z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z5_z0_11, svint32x2_t, svint16_t,
+ z5 = svqrshrn_n_s16_s32_x2 (z0, 11),
+ z5 = svqrshrn_s16 (z0, 11))
+
+/*
+** qrshrn_z22_z16_15:
+** sqrshrn z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z22_z16_15, svint32x2_t, svint16_t,
+ z22 = svqrshrn_n_s16_s32_x2 (z16, 15),
+ z22 = svqrshrn_s16 (z16, 15))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qrshrn_z0_z0_1:
+** uqrshrn z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z0_1, svuint32x2_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u32_x2 (z0, 1),
+ z0_res = svqrshrn_u16 (z0, 1))
+
+/*
+** qrshrn_z0_z6_16:
+** uqrshrn z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z6_16, svuint32x2_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u32_x2 (z6, 16),
+ z0_res = svqrshrn_u16 (z6, 16))
+
+/*
+** qrshrn_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** uqrshrn z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z29_13, svuint32x2_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u32_x2 (z29, 13),
+ z0_res = svqrshrn_u16 (z29, 13))
+
+/*
+** qrshrn_z5_z0_11:
+** uqrshrn z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z5_z0_11, svuint32x2_t, svuint16_t,
+ z5 = svqrshrn_n_u16_u32_x2 (z0, 11),
+ z5 = svqrshrn_u16 (z0, 11))
+
+/*
+** qrshrn_z22_z16_15:
+** uqrshrn z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z22_z16_15, svuint32x2_t, svuint16_t,
+ z22 = svqrshrn_n_u16_u32_x2 (z16, 15),
+ z22 = svqrshrn_u16 (z16, 15))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qrshrun_z0_z0_1:
+** sqrshrun z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z0_z0_1, svint32x2_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s32_x2 (z0, 1),
+ z0_res = svqrshrun_u16 (z0, 1))
+
+/*
+** qrshrun_z0_z6_16:
+** sqrshrun z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z0_z6_16, svint32x2_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s32_x2 (z6, 16),
+ z0_res = svqrshrun_u16 (z6, 16))
+
+/*
+** qrshrun_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrun z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z0_z29_13, svint32x2_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s32_x2 (z29, 13),
+ z0_res = svqrshrun_u16 (z29, 13))
+
+/*
+** qrshrun_z5_z0_11:
+** sqrshrun z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z5_z0_11, svint32x2_t, svuint16_t,
+ z5 = svqrshrun_n_u16_s32_x2 (z0, 11),
+ z5 = svqrshrun_u16 (z0, 11))
+
+/*
+** qrshrun_z22_z16_15:
+** sqrshrun z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z22_z16_15, svint32x2_t, svuint16_t,
+ z22 = svqrshrun_n_u16_s32_x2 (z16, 15),
+ z22 = svqrshrun_u16 (z16, 15))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_bf16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_bf16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_bf16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_bf16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_bf16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_bf16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_bf16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_bf16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_bf16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_bf16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_bf16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_bf16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_bf16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_bf16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_bf16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_bf16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_bf16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_bf16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_bf16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_bf16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_bf16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_bf16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_bf16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_bf16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_bf16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_bf16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_bf16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_bf16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_bf16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_bf16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_bf16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_bf16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_base, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_index, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_1, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_f16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_2, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_f16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_14, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_16, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m1, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_f16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m2, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_f16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m16, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m18, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z17, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z22, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z28, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn0, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn7, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn15, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_0, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_1, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_f16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_2, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_f16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_14, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_16, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_f16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_f16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_base, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_index, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_1, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_2, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_3, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_f16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_4, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_f16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_28, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_f16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_32, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m1, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m2, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m3, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_f16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m4, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_f16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m32, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_f16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m36, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z17, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z22, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z28, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn0, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn7, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn15, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_0, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_1, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_2, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_3, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_f16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_4, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_f16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_28, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_f16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_32, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_f16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_f16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_f16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f32_base:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_base, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f32_index:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_index, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_1, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/*
+** st1_f32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_2, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** st1_f32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_14, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svst1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_16, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svst1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m1, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/*
+** st1_f32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m2, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** st1_f32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m16, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svst1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** st1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m18, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svst1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** st1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z17, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f32_z22:
+** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z22, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f32_z28:
+** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z28, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn0, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn7, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f32_pn15:
+** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn15, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f32_0:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_0, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_1, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_f32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_2, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_f32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_14, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_16, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_f32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_f32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f32_base:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_base, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f32_index:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_index, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_1, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_2, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_3, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svst1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** st1_f32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_4, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svst1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** st1_f32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_28, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svst1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** st1_f32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_32, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svst1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m1, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m2, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m3, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svst1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** st1_f32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m4, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svst1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** st1_f32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m32, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svst1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** st1_f32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m36, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svst1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** st1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z17, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z22, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f32_z28:
+** st1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z28, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn0, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn7, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f32_pn15:
+** st1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn15, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f32_0:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_0, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_1, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_2, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_3, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_f32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_4, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_f32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_28, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_f32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_32, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_f32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_f32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_f32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f64_base:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_base, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f64_index:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_index, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_1, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/*
+** st1_f64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_2, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** st1_f64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_14, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svst1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_16, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svst1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m1, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/*
+** st1_f64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m2, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** st1_f64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m16, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svst1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** st1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m18, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svst1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** st1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z17, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f64_z22:
+** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z22, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f64_z28:
+** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z28, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn0, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn7, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f64_pn15:
+** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn15, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f64_0:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_0, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_1, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_f64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_2, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_f64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_14, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_16, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_f64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_f64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f64_base:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_base, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f64_index:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_index, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_1, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_2, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_3, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svst1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** st1_f64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_4, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svst1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** st1_f64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_28, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svst1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** st1_f64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_32, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svst1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m1, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m2, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m3, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svst1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** st1_f64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m4, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svst1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** st1_f64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m32, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svst1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** st1_f64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m36, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svst1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** st1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z17, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z22, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f64_z28:
+** st1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z28, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn0, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn7, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f64_pn15:
+** st1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn15, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f64_0:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_0, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_1, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_2, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_3, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_f64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_4, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_f64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_28, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_f64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_32, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_f64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_f64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_f64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_base, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_index, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_1, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_s16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_2, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_s16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_14, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_16, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m1, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_s16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m2, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_s16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m16, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m18, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z17, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z22, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z28, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn0, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn7, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn15, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_0, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_1, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_2, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_14, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_16, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m1, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m2, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m16, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m18, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_x1, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_base, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_index, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_1, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_2, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_3, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_s16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_4, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_s16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_28, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_s16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_32, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m1, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m2, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m3, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_s16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m4, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_s16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m32, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_s16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m36, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z17, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z22, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z28, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn0, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn7, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn15, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_0, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_1, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_2, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_3, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_4, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_28, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_32, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m1, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m2, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m3, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m4, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m32, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m36, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_x1, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s32_base:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_base, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s32_index:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_index, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_1, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/*
+** st1_s32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_2, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** st1_s32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_14, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svst1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_16, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svst1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m1, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/*
+** st1_s32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m2, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** st1_s32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m16, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svst1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** st1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m18, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svst1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** st1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z17, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s32_z22:
+** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z22, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s32_z28:
+** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z28, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn0, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn7, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s32_pn15:
+** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn15, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s32_0:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_0, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_1, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_2, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_14, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_16, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m1, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m2, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m16, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m18, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_x1, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s32_base:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_base, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s32_index:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_index, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_1, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_2, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_3, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svst1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** st1_s32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_4, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svst1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** st1_s32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_28, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svst1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** st1_s32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_32, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svst1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m1, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m2, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m3, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svst1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** st1_s32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m4, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svst1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** st1_s32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m32, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svst1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** st1_s32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m36, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svst1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** st1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z17, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z22, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s32_z28:
+** st1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z28, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn0, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn7, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s32_pn15:
+** st1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn15, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s32_0:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_0, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_1, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_2, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_3, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_4, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_28, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_32, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m1, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m2, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m3, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m4, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m32, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m36, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_x1, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s64_base:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_base, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s64_index:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_index, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_1, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/*
+** st1_s64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_2, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** st1_s64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_14, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svst1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_16, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svst1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m1, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/*
+** st1_s64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m2, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** st1_s64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m16, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svst1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** st1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m18, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svst1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** st1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z17, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s64_z22:
+** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z22, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s64_z28:
+** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z28, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn0, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn7, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s64_pn15:
+** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn15, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s64_0:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_0, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_1, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_2, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_14, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_16, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m1, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m2, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m16, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m18, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_x1, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s64_base:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_base, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s64_index:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_index, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_1, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_2, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_3, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svst1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** st1_s64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_4, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svst1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** st1_s64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_28, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svst1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** st1_s64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_32, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svst1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m1, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m2, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m3, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svst1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** st1_s64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m4, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svst1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** st1_s64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m32, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svst1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** st1_s64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m36, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svst1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** st1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z17, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z22, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s64_z28:
+** st1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z28, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn0, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn7, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s64_pn15:
+** st1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn15, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s64_0:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_0, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_1, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_2, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_3, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_4, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_28, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_32, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m1, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m2, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m3, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m4, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m32, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m36, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_x1, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s8_base:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_base, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s8_index:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_index, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_1, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/*
+** st1_s8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_2, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** st1_s8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_14, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svst1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_16, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svst1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m1, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/*
+** st1_s8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m2, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** st1_s8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m16, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svst1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** st1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m18, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svst1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** st1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z17, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s8_z22:
+** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z22, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s8_z28:
+** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z28, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn0, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn7, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s8_pn15:
+** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn15, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s8_0:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_0, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_1, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_2, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_14, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_16, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m1, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m2, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m16, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m18, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_x1, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s8_base:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_base, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s8_index:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_index, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_1, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_2, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_3, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svst1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** st1_s8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_4, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svst1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** st1_s8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_28, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svst1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** st1_s8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_32, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svst1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m1, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m2, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m3, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svst1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** st1_s8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m4, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svst1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** st1_s8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m32, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svst1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** st1_s8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m36, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svst1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** st1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z17, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z22, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s8_z28:
+** st1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z28, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn0, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn7, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s8_pn15:
+** st1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn15, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s8_0:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_0, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_1, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_2, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_3, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_4, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_28, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_32, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m1, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m2, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m3, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m4, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m32, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m36, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_x1, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_base, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_index, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_1, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_u16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_2, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_u16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_14, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_16, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m1, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_u16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m2, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_u16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m16, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m18, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z17, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z22, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z28, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn0, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn7, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn15, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_0, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_1, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_2, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_14, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_16, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_base, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_index, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_1, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_2, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_3, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_u16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_4, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_u16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_28, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_u16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_32, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m1, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m2, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m3, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_u16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m4, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_u16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m32, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_u16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m36, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z17, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z22, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z28, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn0, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn7, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn15, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_0, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_1, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_2, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_3, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_4, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_28, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_32, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u32_base:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_base, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u32_index:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_index, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_1, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/*
+** st1_u32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_2, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** st1_u32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_14, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svst1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_16, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svst1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m1, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/*
+** st1_u32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m2, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** st1_u32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m16, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svst1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** st1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m18, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svst1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** st1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z17, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u32_z22:
+** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z22, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u32_z28:
+** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z28, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn0, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn7, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u32_pn15:
+** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn15, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u32_0:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_0, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_1, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_2, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_14, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_16, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u32_base:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_base, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u32_index:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_index, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_1, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_2, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_3, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svst1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** st1_u32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_4, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svst1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** st1_u32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_28, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svst1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** st1_u32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_32, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svst1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m1, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m2, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m3, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svst1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** st1_u32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m4, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svst1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** st1_u32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m32, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svst1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** st1_u32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m36, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svst1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** st1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z17, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z22, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u32_z28:
+** st1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z28, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn0, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn7, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u32_pn15:
+** st1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn15, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u32_0:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_0, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_1, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_2, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_3, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_4, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_28, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_32, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u64_base:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_base, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u64_index:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_index, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_1, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/*
+** st1_u64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_2, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** st1_u64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_14, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svst1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_16, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svst1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m1, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/*
+** st1_u64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m2, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** st1_u64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m16, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svst1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** st1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m18, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svst1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** st1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z17, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u64_z22:
+** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z22, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u64_z28:
+** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z28, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn0, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn7, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u64_pn15:
+** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn15, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u64_0:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_0, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_1, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_2, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_14, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_16, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u64_base:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_base, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u64_index:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_index, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_1, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_2, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_3, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svst1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** st1_u64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_4, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svst1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** st1_u64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_28, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svst1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** st1_u64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_32, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svst1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m1, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m2, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m3, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svst1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** st1_u64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m4, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svst1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** st1_u64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m32, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svst1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** st1_u64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m36, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svst1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** st1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z17, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z22, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u64_z28:
+** st1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z28, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn0, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn7, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u64_pn15:
+** st1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn15, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u64_0:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_0, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_1, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_2, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_3, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_4, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_28, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_32, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u8_base:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_base, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u8_index:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_index, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_1, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/*
+** st1_u8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_2, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** st1_u8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_14, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svst1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_16, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svst1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m1, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/*
+** st1_u8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m2, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** st1_u8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m16, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svst1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** st1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m18, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svst1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** st1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z17, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u8_z22:
+** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z22, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u8_z28:
+** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z28, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn0, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn7, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u8_pn15:
+** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn15, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u8_0:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_0, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_1, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_2, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_14, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_16, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u8_base:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_base, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u8_index:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_index, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_1, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_2, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_3, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svst1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** st1_u8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_4, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svst1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** st1_u8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_28, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svst1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** st1_u8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_32, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svst1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m1, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m2, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m3, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svst1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** st1_u8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m4, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svst1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** st1_u8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m32, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svst1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** st1_u8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m36, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svst1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** st1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z17, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z22, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u8_z28:
+** st1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z28, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn0, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn7, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u8_pn15:
+** st1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn15, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u8_0:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_0, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_1, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_2, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_3, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_4, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_28, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_32, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_bf16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_bf16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_bf16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_bf16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_bf16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_bf16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_bf16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_bf16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_bf16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_bf16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_bf16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_bf16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_bf16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_bf16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_bf16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_bf16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_bf16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_bf16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_bf16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_bf16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_bf16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_bf16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_bf16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_bf16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_bf16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_bf16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_bf16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_bf16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_bf16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_bf16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_bf16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_bf16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_base, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_index, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_1, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_f16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_2, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_f16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_14, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_16, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m1, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_f16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m2, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_f16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m16, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m18, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z17, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z22, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z28, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn0, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn7, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn15, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_0, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_1, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_f16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_2, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_f16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_14, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_16, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_f16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_f16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_base, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_index, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_1, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_2, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_3, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_f16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_4, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_f16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_28, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_f16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_32, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m1, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m2, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m3, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_f16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m4, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_f16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m32, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_f16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m36, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z17, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z22, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z28, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn0, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn7, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn15, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_0, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_1, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_2, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_3, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_f16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_4, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_f16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_28, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_f16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_32, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_f16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_f16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_f16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f32_base:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_base, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f32_index:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_index, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_1, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/*
+** stnt1_f32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_2, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** stnt1_f32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_14, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svstnt1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_16, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svstnt1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m1, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/*
+** stnt1_f32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m2, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** stnt1_f32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m16, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svstnt1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** stnt1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m18, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svstnt1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** stnt1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z17, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f32_z22:
+** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z22, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f32_z28:
+** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z28, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn0, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn7, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f32_pn15:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn15, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f32_0:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_0, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_1, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_f32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_2, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_f32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_14, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_16, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_f32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_f32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f32_base:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_base, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f32_index:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_index, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_1, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_2, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_3, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svstnt1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** stnt1_f32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_4, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svstnt1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** stnt1_f32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_28, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svstnt1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** stnt1_f32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_32, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svstnt1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m1, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m2, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m3, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svstnt1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** stnt1_f32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m4, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svstnt1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** stnt1_f32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m32, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svstnt1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** stnt1_f32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m36, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svstnt1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** stnt1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z17, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z22, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f32_z28:
+** stnt1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z28, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn0, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn7, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f32_pn15:
+** stnt1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn15, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f32_0:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_0, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_1, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_2, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_3, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_f32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_4, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_f32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_28, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_f32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_32, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_f32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_f32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_f32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f64_base:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_base, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f64_index:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_index, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_1, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/*
+** stnt1_f64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_2, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** stnt1_f64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_14, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svstnt1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_16, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svstnt1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m1, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/*
+** stnt1_f64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m2, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** stnt1_f64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m16, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svstnt1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** stnt1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m18, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svstnt1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** stnt1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z17, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f64_z22:
+** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z22, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f64_z28:
+** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z28, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn0, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn7, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f64_pn15:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn15, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f64_0:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_0, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_1, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_f64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_2, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_f64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_14, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_16, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_f64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_f64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f64_base:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_base, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f64_index:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_index, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_1, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_2, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_3, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svstnt1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** stnt1_f64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_4, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svstnt1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** stnt1_f64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_28, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svstnt1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** stnt1_f64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_32, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svstnt1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m1, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m2, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m3, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svstnt1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** stnt1_f64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m4, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svstnt1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** stnt1_f64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m32, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svstnt1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** stnt1_f64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m36, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svstnt1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** stnt1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z17, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z22, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f64_z28:
+** stnt1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z28, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn0, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn7, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f64_pn15:
+** stnt1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn15, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f64_0:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_0, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_1, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_2, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_3, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_f64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_4, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_f64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_28, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_f64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_32, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_f64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_f64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_f64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_base, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_index, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_1, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_s16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_2, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_s16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_14, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_16, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m1, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_s16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m2, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_s16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m16, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m18, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z17, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z22, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z28, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn0, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn7, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn15, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_0, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_1, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_2, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_14, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_16, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m1, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m2, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m16, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m18, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_x1, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_base, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_index, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_1, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_2, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_3, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_s16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_4, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_s16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_28, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_s16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_32, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m1, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m2, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m3, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_s16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m4, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_s16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m32, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_s16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m36, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z17, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z22, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z28, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn0, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn7, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn15, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_0, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_1, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_2, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_3, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_4, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_28, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_32, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m1, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m2, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m3, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m4, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m32, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m36, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_x1, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s32_base:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_base, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s32_index:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_index, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_1, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/*
+** stnt1_s32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_2, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** stnt1_s32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_14, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svstnt1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_16, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svstnt1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m1, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/*
+** stnt1_s32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m2, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** stnt1_s32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m16, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svstnt1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** stnt1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m18, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svstnt1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** stnt1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z17, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s32_z22:
+** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z22, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s32_z28:
+** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z28, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn0, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn7, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s32_pn15:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn15, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s32_0:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_0, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_1, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_2, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_14, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_16, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m1, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m2, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m16, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m18, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_x1, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s32_base:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_base, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s32_index:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_index, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_1, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_2, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_3, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svstnt1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** stnt1_s32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_4, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svstnt1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** stnt1_s32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_28, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svstnt1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** stnt1_s32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_32, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svstnt1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m1, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m2, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m3, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svstnt1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** stnt1_s32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m4, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svstnt1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** stnt1_s32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m32, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svstnt1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** stnt1_s32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m36, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svstnt1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** stnt1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z17, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z22, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s32_z28:
+** stnt1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z28, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn0, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn7, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s32_pn15:
+** stnt1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn15, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s32_0:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_0, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_1, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_2, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_3, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_4, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_28, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_32, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m1, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m2, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m3, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m4, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m32, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m36, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_x1, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s64_base:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_base, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s64_index:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_index, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_1, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/*
+** stnt1_s64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_2, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** stnt1_s64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_14, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svstnt1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_16, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svstnt1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m1, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/*
+** stnt1_s64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m2, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** stnt1_s64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m16, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svstnt1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** stnt1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m18, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svstnt1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** stnt1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z17, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s64_z22:
+** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z22, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s64_z28:
+** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z28, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn0, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn7, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s64_pn15:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn15, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s64_0:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_0, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_1, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_2, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_14, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_16, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m1, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m2, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m16, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m18, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_x1, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s64_base:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_base, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s64_index:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_index, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_1, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_2, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_3, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svstnt1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** stnt1_s64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_4, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svstnt1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** stnt1_s64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_28, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svstnt1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** stnt1_s64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_32, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svstnt1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m1, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m2, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m3, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svstnt1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** stnt1_s64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m4, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svstnt1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** stnt1_s64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m32, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svstnt1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** stnt1_s64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m36, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svstnt1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** stnt1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z17, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z22, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s64_z28:
+** stnt1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z28, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn0, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn7, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s64_pn15:
+** stnt1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn15, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s64_0:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_0, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_1, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_2, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_3, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_4, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_28, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_32, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m1, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m2, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m3, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m4, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m32, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m36, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_x1, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s8_base:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_base, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s8_index:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_index, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_1, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/*
+** stnt1_s8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_2, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** stnt1_s8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_14, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svstnt1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_16, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svstnt1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m1, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/*
+** stnt1_s8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m2, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** stnt1_s8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m16, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svstnt1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** stnt1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m18, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svstnt1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** stnt1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z17, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s8_z22:
+** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z22, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s8_z28:
+** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z28, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn0, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn7, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s8_pn15:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn15, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s8_0:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_0, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_1, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_2, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_14, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_16, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m1, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m2, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m16, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m18, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_x1, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s8_base:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_base, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s8_index:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_index, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_1, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_2, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_3, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svstnt1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** stnt1_s8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_4, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svstnt1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** stnt1_s8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_28, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svstnt1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** stnt1_s8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_32, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svstnt1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m1, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m2, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m3, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svstnt1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** stnt1_s8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m4, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svstnt1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** stnt1_s8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m32, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svstnt1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** stnt1_s8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m36, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svstnt1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** stnt1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z17, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z22, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s8_z28:
+** stnt1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z28, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn0, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn7, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s8_pn15:
+** stnt1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn15, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s8_0:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_0, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_1, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_2, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_3, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_4, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_28, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_32, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m1, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m2, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m3, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m4, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m32, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m36, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_x1, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_base, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_index, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_1, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_u16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_2, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_u16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_14, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_16, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m1, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_u16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m2, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_u16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m16, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m18, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z17, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z22, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z28, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn0, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn7, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn15, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_0, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_1, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_2, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_14, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_16, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_base, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_index, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_1, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_2, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_3, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_u16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_4, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_u16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_28, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_u16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_32, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m1, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m2, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m3, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_u16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m4, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_u16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m32, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_u16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m36, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z17, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z22, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z28, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn0, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn7, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn15, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_0, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_1, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_2, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_3, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_4, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_28, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_32, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u32_base:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_base, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u32_index:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_index, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_1, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/*
+** stnt1_u32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_2, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** stnt1_u32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_14, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svstnt1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_16, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svstnt1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m1, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/*
+** stnt1_u32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m2, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** stnt1_u32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m16, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svstnt1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** stnt1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m18, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svstnt1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** stnt1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z17, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u32_z22:
+** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z22, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u32_z28:
+** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z28, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn0, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn7, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u32_pn15:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn15, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u32_0:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_0, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_1, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_2, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_14, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_16, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u32_base:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_base, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u32_index:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_index, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_1, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_2, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_3, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svstnt1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** stnt1_u32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_4, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svstnt1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** stnt1_u32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_28, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svstnt1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** stnt1_u32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_32, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svstnt1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m1, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m2, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m3, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svstnt1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** stnt1_u32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m4, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svstnt1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** stnt1_u32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m32, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svstnt1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** stnt1_u32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m36, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svstnt1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** stnt1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z17, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z22, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u32_z28:
+** stnt1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z28, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn0, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn7, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u32_pn15:
+** stnt1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn15, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u32_0:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_0, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_1, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_2, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_3, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_4, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_28, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_32, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u64_base:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_base, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u64_index:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_index, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_1, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/*
+** stnt1_u64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_2, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** stnt1_u64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_14, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svstnt1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_16, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svstnt1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m1, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/*
+** stnt1_u64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m2, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** stnt1_u64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m16, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svstnt1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** stnt1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m18, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svstnt1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** stnt1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z17, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u64_z22:
+** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z22, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u64_z28:
+** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z28, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn0, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn7, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u64_pn15:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn15, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u64_0:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_0, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_1, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_2, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_14, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_16, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u64_base:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_base, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u64_index:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_index, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_1, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_2, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_3, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svstnt1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** stnt1_u64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_4, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svstnt1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** stnt1_u64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_28, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svstnt1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** stnt1_u64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_32, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svstnt1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m1, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m2, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m3, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svstnt1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** stnt1_u64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m4, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svstnt1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** stnt1_u64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m32, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svstnt1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** stnt1_u64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m36, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svstnt1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** stnt1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z17, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z22, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u64_z28:
+** stnt1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z28, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn0, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn7, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u64_pn15:
+** stnt1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn15, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u64_0:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_0, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_1, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_2, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_3, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_4, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_28, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_32, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u8_base:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_base, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u8_index:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_index, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_1, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/*
+** stnt1_u8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_2, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** stnt1_u8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_14, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svstnt1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_16, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svstnt1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m1, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/*
+** stnt1_u8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m2, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** stnt1_u8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m16, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svstnt1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** stnt1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m18, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svstnt1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** stnt1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z17, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u8_z22:
+** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z22, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u8_z28:
+** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z28, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn0, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn7, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u8_pn15:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn15, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u8_0:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_0, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_1, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_2, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_14, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_16, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u8_base:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_base, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u8_index:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_index, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_1, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_2, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_3, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svstnt1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** stnt1_u8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_4, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svstnt1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** stnt1_u8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_28, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svstnt1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** stnt1_u8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_32, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svstnt1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m1, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m2, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m3, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svstnt1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** stnt1_u8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m4, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svstnt1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** stnt1_u8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m32, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svstnt1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** stnt1_u8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m36, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svstnt1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** stnt1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z17, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z22, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u8_z28:
+** stnt1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z28, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn0, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn7, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u8_pn15:
+** stnt1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn15, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u8_0:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_0, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_1, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_2, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_3, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_4, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_28, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_32, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b16_s64_x2 (x0, x1),
+ p1 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b16_s64_x2 (x0, x1),
+ p4 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b16_s64_x2 (x0, x1),
+ p9 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b16_s64_x2 (x0, x1),
+ p14 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b16_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b16_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b16_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b16_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b16_u64_x2 (x0, x1),
+ p4 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b16_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b16_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b16_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b32_s64_x2 (x0, x1),
+ p1 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b32_s64_x2 (x0, x1),
+ p4 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b32_s64_x2 (x0, x1),
+ p9 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b32_s64_x2 (x0, x1),
+ p14 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b32_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b32_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b32_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b32_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b32_u64_x2 (x0, x1),
+ p4 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b32_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b32_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b32_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b64_s64_x2 (x0, x1),
+ p1 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b64_s64_x2 (x0, x1),
+ p4 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b64_s64_x2 (x0, x1),
+ p9 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b64_s64_x2 (x0, x1),
+ p14 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b64_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b64_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b64_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b64_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b64_u64_x2 (x0, x1),
+ p4 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b64_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b64_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b64_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b8_s64_x2 (x0, x1),
+ p1 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b8_s64_x2 (x0, x1),
+ p4 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b8_s64_x2 (x0, x1),
+ p9 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b8_s64_x2 (x0, x1),
+ p14 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b8_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b8_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b8_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b8_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b8_u64_x2 (x0, x1),
+ p4 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b8_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b8_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b8_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c16_s64 (x0, x1, 2),
+ pn0 = svwhilege_c16 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c16_s64 (x0, x1, 4),
+ pn7 = svwhilege_c16 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c16_s64 (x0, x1, 2),
+ pn8 = svwhilege_c16 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c16_s64 (x0, x1, 4),
+ pn15 = svwhilege_c16 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c16_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c16_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c16_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c16_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c16_u64 (x0, x1, 2),
+ pn8 = svwhilege_c16 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c16_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c16_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c16_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c32_s64 (x0, x1, 2),
+ pn0 = svwhilege_c32 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c32_s64 (x0, x1, 4),
+ pn7 = svwhilege_c32 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c32_s64 (x0, x1, 2),
+ pn8 = svwhilege_c32 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c32_s64 (x0, x1, 4),
+ pn15 = svwhilege_c32 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c32_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c32_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c32_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c32_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c32_u64 (x0, x1, 2),
+ pn8 = svwhilege_c32 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c32_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c32_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c32_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c64_s64 (x0, x1, 2),
+ pn0 = svwhilege_c64 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c64_s64 (x0, x1, 4),
+ pn7 = svwhilege_c64 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c64_s64 (x0, x1, 2),
+ pn8 = svwhilege_c64 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c64_s64 (x0, x1, 4),
+ pn15 = svwhilege_c64 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c64_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c64_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c64_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c64_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c64_u64 (x0, x1, 2),
+ pn8 = svwhilege_c64 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c64_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c64_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c64_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c8_s64 (x0, x1, 2),
+ pn0 = svwhilege_c8 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c8_s64 (x0, x1, 4),
+ pn7 = svwhilege_c8 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c8_s64 (x0, x1, 2),
+ pn8 = svwhilege_c8 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c8_s64 (x0, x1, 4),
+ pn15 = svwhilege_c8 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c8_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c8_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c8_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c8_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c8_u64 (x0, x1, 2),
+ pn8 = svwhilege_c8 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c8_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c8_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c8_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b16_s64_x2 (x0, x1),
+ p1 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b16_s64_x2 (x0, x1),
+ p4 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b16_s64_x2 (x0, x1),
+ p9 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b16_s64_x2 (x0, x1),
+ p14 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b16_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b16_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b16_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b16_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b16_u64_x2 (x0, x1),
+ p4 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b16_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b16_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b16_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b32_s64_x2 (x0, x1),
+ p1 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b32_s64_x2 (x0, x1),
+ p4 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b32_s64_x2 (x0, x1),
+ p9 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b32_s64_x2 (x0, x1),
+ p14 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b32_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b32_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b32_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b32_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b32_u64_x2 (x0, x1),
+ p4 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b32_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b32_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b32_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b64_s64_x2 (x0, x1),
+ p1 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b64_s64_x2 (x0, x1),
+ p4 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b64_s64_x2 (x0, x1),
+ p9 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b64_s64_x2 (x0, x1),
+ p14 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b64_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b64_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b64_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b64_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b64_u64_x2 (x0, x1),
+ p4 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b64_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b64_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b64_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b8_s64_x2 (x0, x1),
+ p1 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b8_s64_x2 (x0, x1),
+ p4 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b8_s64_x2 (x0, x1),
+ p9 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b8_s64_x2 (x0, x1),
+ p14 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b8_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b8_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b8_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b8_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b8_u64_x2 (x0, x1),
+ p4 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b8_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b8_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b8_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c16_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c16 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c16_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c16 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c16_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c16 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c16_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c16 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c16_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c16_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c16_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c16_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c16_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c16 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c16_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c16_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c16_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c32_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c32 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c32_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c32 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c32_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c32 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c32_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c32 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c32_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c32_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c32_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c32_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c32_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c32 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c32_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c32_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c32_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c64_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c64 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c64_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c64 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c64_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c64 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c64_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c64 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c64_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c64_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c64_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c64_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c64_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c64 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c64_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c64_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c64_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c8_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c8 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c8_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c8 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c8_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c8 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c8_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c8 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c8_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c8_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c8_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c8_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c8_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c8 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c8_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c8_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c8_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b16_s64_x2 (x0, x1),
+ p1 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b16_s64_x2 (x0, x1),
+ p4 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b16_s64_x2 (x0, x1),
+ p9 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b16_s64_x2 (x0, x1),
+ p14 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b16_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b16_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b16_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b16_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b16_u64_x2 (x0, x1),
+ p4 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b16_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b16_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b16_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b32_s64_x2 (x0, x1),
+ p1 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b32_s64_x2 (x0, x1),
+ p4 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b32_s64_x2 (x0, x1),
+ p9 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b32_s64_x2 (x0, x1),
+ p14 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b32_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b32_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b32_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b32_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b32_u64_x2 (x0, x1),
+ p4 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b32_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b32_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b32_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b64_s64_x2 (x0, x1),
+ p1 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b64_s64_x2 (x0, x1),
+ p4 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b64_s64_x2 (x0, x1),
+ p9 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b64_s64_x2 (x0, x1),
+ p14 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b64_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b64_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b64_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b64_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b64_u64_x2 (x0, x1),
+ p4 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b64_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b64_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b64_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b8_s64_x2 (x0, x1),
+ p1 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b8_s64_x2 (x0, x1),
+ p4 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b8_s64_x2 (x0, x1),
+ p9 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b8_s64_x2 (x0, x1),
+ p14 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b8_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b8_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b8_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b8_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b8_u64_x2 (x0, x1),
+ p4 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b8_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b8_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b8_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c16_s64 (x0, x1, 2),
+ pn0 = svwhilele_c16 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c16_s64 (x0, x1, 4),
+ pn7 = svwhilele_c16 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c16_s64 (x0, x1, 2),
+ pn8 = svwhilele_c16 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c16_s64 (x0, x1, 4),
+ pn15 = svwhilele_c16 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c16_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c16_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c16_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c16_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c16_u64 (x0, x1, 2),
+ pn8 = svwhilele_c16 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c16_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c16_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c16_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c32_s64 (x0, x1, 2),
+ pn0 = svwhilele_c32 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c32_s64 (x0, x1, 4),
+ pn7 = svwhilele_c32 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c32_s64 (x0, x1, 2),
+ pn8 = svwhilele_c32 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c32_s64 (x0, x1, 4),
+ pn15 = svwhilele_c32 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c32_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c32_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c32_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c32_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c32_u64 (x0, x1, 2),
+ pn8 = svwhilele_c32 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c32_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c32_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c32_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c64_s64 (x0, x1, 2),
+ pn0 = svwhilele_c64 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c64_s64 (x0, x1, 4),
+ pn7 = svwhilele_c64 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c64_s64 (x0, x1, 2),
+ pn8 = svwhilele_c64 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c64_s64 (x0, x1, 4),
+ pn15 = svwhilele_c64 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c64_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c64_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c64_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c64_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c64_u64 (x0, x1, 2),
+ pn8 = svwhilele_c64 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c64_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c64_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c64_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c8_s64 (x0, x1, 2),
+ pn0 = svwhilele_c8 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c8_s64 (x0, x1, 4),
+ pn7 = svwhilele_c8 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c8_s64 (x0, x1, 2),
+ pn8 = svwhilele_c8 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c8_s64 (x0, x1, 4),
+ pn15 = svwhilele_c8 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c8_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c8_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c8_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c8_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c8_u64 (x0, x1, 2),
+ pn8 = svwhilele_c8 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c8_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c8_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c8_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b16_s64_x2 (x0, x1),
+ p1 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b16_s64_x2 (x0, x1),
+ p4 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b16_s64_x2 (x0, x1),
+ p9 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b16_s64_x2 (x0, x1),
+ p14 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b16_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b16_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b16_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b16_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b16_u64_x2 (x0, x1),
+ p4 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b16_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b16_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b16_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b32_s64_x2 (x0, x1),
+ p1 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b32_s64_x2 (x0, x1),
+ p4 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b32_s64_x2 (x0, x1),
+ p9 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b32_s64_x2 (x0, x1),
+ p14 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b32_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b32_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b32_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b32_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b32_u64_x2 (x0, x1),
+ p4 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b32_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b32_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b32_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b64_s64_x2 (x0, x1),
+ p1 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b64_s64_x2 (x0, x1),
+ p4 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b64_s64_x2 (x0, x1),
+ p9 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b64_s64_x2 (x0, x1),
+ p14 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b64_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b64_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b64_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b64_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b64_u64_x2 (x0, x1),
+ p4 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b64_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b64_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b64_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b8_s64_x2 (x0, x1),
+ p1 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b8_s64_x2 (x0, x1),
+ p4 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b8_s64_x2 (x0, x1),
+ p9 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b8_s64_x2 (x0, x1),
+ p14 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b8_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b8_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b8_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b8_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b8_u64_x2 (x0, x1),
+ p4 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b8_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b8_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b8_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c16_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c16 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c16_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c16 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c16_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c16 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c16_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c16 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c16_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c16_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c16_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c16_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c16_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c16 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c16_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c16_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c16_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c32_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c32 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c32_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c32 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c32_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c32 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c32_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c32 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c32_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c32_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c32_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c32_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c32_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c32 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c32_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c32_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c32_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c64_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c64 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c64_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c64 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c64_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c64 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c64_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c64 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c64_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c64_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c64_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c64_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c64_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c64 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c64_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c64_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c64_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c8_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c8 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c8_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c8 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c8_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c8 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c8_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c8 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c8_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c8_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c8_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c8_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c8_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c8 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c8_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c8_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c8_u64 (x0, 5, 4))