}
};
+class svfirst_lastp_impl : public function_base
+{
+public:
+ CONSTEXPR svfirst_lastp_impl (bool first)
+ : m_first (first)
+ {}
+
+ gimple *
+ fold (gimple_folder &f) const override
+ {
+ tree pg = gimple_call_arg (f.call, 0);
+ tree pn = gimple_call_arg (f.call, 1);
+
+ gcc_assert (TYPE_MODE (TREE_TYPE (pg)) == TYPE_MODE (TREE_TYPE (pn)));
+
+ if (is_pfalse (pg) || is_pfalse (pn))
+ return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs)));
+
+ if (TREE_CODE (pg) != VECTOR_CST
+ || TREE_CODE (pn) != VECTOR_CST)
+ return NULL;
+
+ HOST_WIDE_INT nelts_full_vector = aarch64_fold_sve_cnt_pat (AARCH64_SV_ALL,
+ f.elements_per_vq (0));
+ if (!m_first && nelts_full_vector < 0)
+ return NULL;
+
+ tree pa = fold_build2 (BIT_AND_EXPR, TREE_TYPE (pg), pg, pn);
+ gcc_assert (TREE_CODE (pa) == VECTOR_CST);
+
+ int elt_size = f.type_suffix (0).element_bytes;
+ unsigned int nelts = vector_cst_encoded_nelts (pa);
+ for (unsigned int i = 0; i < nelts; i++)
+ {
+ unsigned int idx = m_first ? i : nelts - 1 - i;
+ if (tree_to_shwi (VECTOR_CST_ENCODED_ELT (pa, idx)) != 0)
+ return f.fold_call_to (build_int_cst (TREE_TYPE (f.lhs),
+ m_first
+ ? i / elt_size
+ : (nelts_full_vector - 1
+ - i / elt_size)));
+ }
+
+ return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs)));
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ machine_mode mode = e.vector_mode (0);
+ return e.use_exact_insn (m_first ? code_for_aarch64_pred_firstp (mode)
+ : code_for_aarch64_pred_lastp (mode));
+ }
+
+private:
+ /* True for svfirstp, false for svlastp. */
+ bool m_first;
+};
+
class svld1q_gather_impl : public full_width_access
{
public:
FUNCTION (sveorqv, reduction, (UNSPEC_EORQV, UNSPEC_EORQV, -1))
FUNCTION (sveortb, unspec_based_function, (UNSPEC_EORTB, UNSPEC_EORTB, -1))
FUNCTION (svextq, svextq_impl,)
+FUNCTION (svfirstp, svfirst_lastp_impl, (true))
FUNCTION (svhadd, unspec_based_function, (UNSPEC_SHADD, UNSPEC_UHADD, -1))
FUNCTION (svhsub, unspec_based_function, (UNSPEC_SHSUB, UNSPEC_UHSUB, -1))
FUNCTION (svhistcnt, CODE_FOR_MODE0 (aarch64_sve2_histcnt),)
FUNCTION (svhistseg, CODE_FOR_MODE0 (aarch64_sve2_histseg),)
FUNCTION (svhsubr, unspec_based_function_rotated, (UNSPEC_SHSUB,
UNSPEC_UHSUB, -1))
+FUNCTION (svlastp, svfirst_lastp_impl, (false))
FUNCTION (svld1q_gather, svld1q_gather_impl,)
FUNCTION (svld1udq, svld1uxq_impl, (VNx1DImode))
FUNCTION (svld1uwq, svld1uxq_impl, (VNx1SImode))
DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, z)
DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, z)
DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, z)
+DEF_SVE_FUNCTION (svfirstp, count_pred, all_pred, implicit)
+DEF_SVE_FUNCTION (svlastp, count_pred, all_pred, implicit)
DEF_SVE_FUNCTION (svrint32x, unary, sd_float, mxz)
DEF_SVE_FUNCTION (svrint32z, unary, sd_float, mxz)
DEF_SVE_FUNCTION (svrint64x, unary, sd_float, mxz)
extern const function_base *const sveorqv;
extern const function_base *const sveortb;
extern const function_base *const svextq;
+ extern const function_base *const svfirstp;
extern const function_base *const svhadd;
extern const function_base *const svhistcnt;
extern const function_base *const svhistseg;
extern const function_base *const svhsub;
extern const function_base *const svhsubr;
+ extern const function_base *const svlastp;
extern const function_base *const svld1q_gather;
extern const function_base *const svld1udq;
extern const function_base *const svld1uwq;
;; ---- [PRED] Predicate extraction
;; ---- [PRED] Predicate selection
;; ---- [PRED] Predicate count
+;; ---- [PRED] Predicate first/last true element
;;
;; == Uniform unary arithmnetic
;; ---- [FP] General unary arithmetic that maps to unspecs
[(set_attr "sve_type" "sve_pred_cnt_scalar")]
)
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate first/last true element
+;; -------------------------------------------------------------------------
+;; Includes
+;; - FIRSTP (predicate first true element) (SVE2p2, SME2p2)
+;; - LASTP (predicate last true element) (SVE2p2, SME2p2)
+;; -------------------------------------------------------------------------
+
+;; Count the number of set bits in a predicate. Operand 3 is true if
+;; operand 1 is known to be all-true.
+(define_insn "@aarch64_pred_firstp<mode>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
+ (match_operand:PRED_ALL 2 "register_operand" "Upa")]
+ UNSPEC_FIRSTP))]
+ "TARGET_SVE2p2_OR_SME2p2"
+ "firstp\t%x0, %1, %2.<Vetype>"
+ [(set_attr "sve_type" "sve_pred_cnt_scalar")]
+)
+
+;; Count the number of set bits in a predicate. Operand 3 is true if
+;; operand 1 is known to be all-true.
+(define_insn "@aarch64_pred_lastp<mode>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
+ (match_operand:PRED_ALL 2 "register_operand" "Upa")]
+ UNSPEC_LASTP))]
+ "TARGET_SVE2p2_OR_SME2p2"
+ "lastp\t%x0, %1, %2.<Vetype>"
+ [(set_attr "sve_type" "sve_pred_cnt_scalar")]
+)
+
;; =========================================================================
;; == Uniform unary arithmnetic
;; =========================================================================
UNSPEC_FCVT ; Used in aarch64-sve2.md.
UNSPEC_FCVTNB ; Used in aarch64-sve2.md.
UNSPEC_FCVTNT ; Used in aarch64-sve2.md.
+ UNSPEC_FIRSTP ; Used in aarch64-sve2.md.
UNSPEC_FMAXNMP ; Used in aarch64-sve2.md.
UNSPEC_FMAXP ; Used in aarch64-sve2.md.
UNSPEC_FMINNMP ; Used in aarch64-sve2.md.
UNSPEC_FP8FCVTN ; Used in aarch64-sve2.md.
UNSPEC_HISTCNT ; Used in aarch64-sve2.md.
UNSPEC_HISTSEG ; Used in aarch64-sve2.md.
+ UNSPEC_LASTP ; Used in aarch64-sve2.md.
UNSPEC_LD1_COUNT ; Used in aarch64-sve2.md.
UNSPEC_LDNT1_COUNT ; Used in aarch64-sve2.md.
UNSPEC_MATCH ; Used in aarch64-sve2.md.
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** firstp_b16_32:
+** firstp x0, p0, p1\.h
+** ret
+*/
+TEST_PTEST (firstp_b16_32, uint32_t,
+ x0 = svfirstp_b16 (p0, p1));
+
+/*
+** firstp_b16_64:
+** firstp x0, p0, p1\.h
+** ret
+*/
+TEST_PTEST (firstp_b16_64, uint64_t,
+ x0 = svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_32_general_x0:
+** firstp x([0-9]+), p0, p1\.h
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b16_32_general_x0, uint32_t,
+ x0 += svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_32_general_x1:
+** firstp x([0-9]+), p0, p1\.h
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b16_32_general_x1, uint32_t,
+ x0 = x1 + svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.h
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b16_64_general_x0, uint64_t,
+ x0 += svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.h
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b16_64_general_x1, uint64_t,
+ x0 = x1 + svfirstp_b16 (p0, p1));
+
+/*
+** firstp_dec_b16_32_general_x0:
+** firstp x([0-9]+), p0, p1\.h
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b16_32_general_x0, uint32_t,
+ x0 -= svfirstp_b16 (p0, p1));
+
+/*
+** firstp_dec_b16_32_general_x1:
+** firstp x([0-9]+), p0, p1\.h
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b16_32_general_x1, uint32_t,
+ x0 = x1 - svfirstp_b16 (p0, p1));
+
+/*
+** firstp_dec_b16_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.h
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b16_64_general_x0, uint64_t,
+ x0 -= svfirstp_b16 (p0, p1));
+
+/*
+** firstp_dec_b16_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.h
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b16_64_general_x1, uint64_t,
+ x0 = x1 - svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_u16_general_z0:
+** firstp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** add z0\.h, (z0\.h, \2|\2, z0\.h)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z0, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)),
+ z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)));
+
+/*
+** firstp_inc_b16_u16_general_z1:
+** firstp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** add z0\.h, (z1\.h, \2|\2, z1\.h)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z1, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)),
+ z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)));
+
+/*
+** firstp_inc_b16_u16_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** add z0\.h, (z0\.h, \3|\3, z0\.h)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z0, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)),
+ z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)));
+
+/*
+** firstp_inc_b16_u16_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** add z0\.h, (z1\.h, \3|\3, z1\.h)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z1, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)),
+ z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)));
+
+/*
+** firstp_dec_b16_u16_general_z0:
+** firstp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** sub z0\.h, z0\.h, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z0, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)),
+ z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)));
+
+/*
+** firstp_dec_b16_u16_general_z1:
+** firstp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** sub z0\.h, z1\.h, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z1, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)),
+ z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)));
+
+/*
+** firstp_dec_b16_u16_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** sub z0\.h, z0\.h, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z0, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)),
+ z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)));
+
+/*
+** firstp_dec_b16_u16_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** sub z0\.h, z1\.h, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z1, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)),
+ z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)));
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** firstp_b32_32:
+** firstp x0, p0, p1\.s
+** ret
+*/
+TEST_PTEST (firstp_b32_32, uint32_t,
+ x0 = svfirstp_b32 (p0, p1));
+
+/*
+** firstp_b32_64:
+** firstp x0, p0, p1\.s
+** ret
+*/
+TEST_PTEST (firstp_b32_64, uint64_t,
+ x0 = svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_32_general_x0:
+** firstp x([0-9]+), p0, p1\.s
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b32_32_general_x0, uint32_t,
+ x0 += svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_32_general_x1:
+** firstp x([0-9]+), p0, p1\.s
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b32_32_general_x1, uint32_t,
+ x0 = x1 + svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.s
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b32_64_general_x0, uint64_t,
+ x0 += svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.s
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b32_64_general_x1, uint64_t,
+ x0 = x1 + svfirstp_b32 (p0, p1));
+
+/*
+** firstp_dec_b32_32_general_x0:
+** firstp x([0-9]+), p0, p1\.s
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b32_32_general_x0, uint32_t,
+ x0 -= svfirstp_b32 (p0, p1));
+
+/*
+** firstp_dec_b32_32_general_x1:
+** firstp x([0-9]+), p0, p1\.s
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b32_32_general_x1, uint32_t,
+ x0 = x1 - svfirstp_b32 (p0, p1));
+
+/*
+** firstp_dec_b32_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.s
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b32_64_general_x0, uint64_t,
+ x0 -= svfirstp_b32 (p0, p1));
+
+/*
+** firstp_dec_b32_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.s
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b32_64_general_x1, uint64_t,
+ x0 = x1 - svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_u32_general_z0:
+** firstp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** add z0\.s, (z0\.s, \2|\2, z0\.s)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z0, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)),
+ z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)));
+
+/*
+** firstp_inc_b32_u32_general_z1:
+** firstp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** add z0\.s, (z1\.s, \2|\2, z1\.s)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z1, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)),
+ z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)));
+
+/*
+** firstp_inc_b32_u32_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** add z0\.s, (z0\.s, \3|\3, z0\.s)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z0, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)),
+ z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)));
+
+/*
+** firstp_inc_b32_u32_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** add z0\.s, (z1\.s, \3|\3, z1\.s)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z1, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)),
+ z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)));
+
+/*
+** firstp_dec_b32_u32_general_z0:
+** firstp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** sub z0\.s, z0\.s, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z0, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)),
+ z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)));
+
+/*
+** firstp_dec_b32_u32_general_z1:
+** firstp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** sub z0\.s, z1\.s, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z1, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)),
+ z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)));
+
+/*
+** firstp_dec_b32_u32_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** sub z0\.s, z0\.s, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z0, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)),
+ z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)));
+
+/*
+** firstp_dec_b32_u32_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** sub z0\.s, z1\.s, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z1, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)),
+ z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)));
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** firstp_b64_32:
+** firstp x0, p0, p1\.d
+** ret
+*/
+TEST_PTEST (firstp_b64_32, uint32_t,
+ x0 = svfirstp_b64 (p0, p1));
+
+/*
+** firstp_b64_64:
+** firstp x0, p0, p1\.d
+** ret
+*/
+TEST_PTEST (firstp_b64_64, uint64_t,
+ x0 = svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_32_general_x0:
+** firstp x([0-9]+), p0, p1\.d
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b64_32_general_x0, uint32_t,
+ x0 += svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_32_general_x1:
+** firstp x([0-9]+), p0, p1\.d
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b64_32_general_x1, uint32_t,
+ x0 = x1 + svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.d
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b64_64_general_x0, uint64_t,
+ x0 += svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.d
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b64_64_general_x1, uint64_t,
+ x0 = x1 + svfirstp_b64 (p0, p1));
+
+/*
+** firstp_dec_b64_32_general_x0:
+** firstp x([0-9]+), p0, p1\.d
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b64_32_general_x0, uint32_t,
+ x0 -= svfirstp_b64 (p0, p1));
+
+/*
+** firstp_dec_b64_32_general_x1:
+** firstp x([0-9]+), p0, p1\.d
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b64_32_general_x1, uint32_t,
+ x0 = x1 - svfirstp_b64 (p0, p1));
+
+/*
+** firstp_dec_b64_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.d
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b64_64_general_x0, uint64_t,
+ x0 -= svfirstp_b64 (p0, p1));
+
+/*
+** firstp_dec_b64_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.d
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b64_64_general_x1, uint64_t,
+ x0 = x1 - svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_u64_general_z0:
+** firstp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** add z0\.d, (z0\.d, \2|\2, z0\.d)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z0, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)),
+ z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)));
+
+/*
+** firstp_inc_b64_u64_general_z1:
+** firstp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** add z0\.d, (z1\.d, \2|\2, z1\.d)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z1, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)),
+ z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)));
+
+/*
+** firstp_inc_b64_u64_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** add z0\.d, (z0\.d, \3|\3, z0\.d)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z0, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)),
+ z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)));
+
+/*
+** firstp_inc_b64_u64_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** add z0\.d, (z1\.d, \3|\3, z1\.d)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z1, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)),
+ z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)));
+
+/*
+** firstp_dec_b64_u64_general_z0:
+** firstp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** sub z0\.d, z0\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z0, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)),
+ z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)));
+
+/*
+** firstp_dec_b64_u64_general_z1:
+** firstp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** sub z0\.d, z1\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z1, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)),
+ z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)));
+
+/*
+** firstp_dec_b64_u64_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** sub z0\.d, z0\.d, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z0, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)),
+ z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)));
+
+/*
+** firstp_dec_b64_u64_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** sub z0\.d, z1\.d, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z1, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)),
+ z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)));
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** firstp_b8_32:
+** firstp x0, p0, p1\.b
+** ret
+*/
+TEST_PTEST (firstp_b8_32, uint32_t,
+ x0 = svfirstp_b8 (p0, p1));
+
+/*
+** firstp_b8_64:
+** firstp x0, p0, p1\.b
+** ret
+*/
+TEST_PTEST (firstp_b8_64, uint64_t,
+ x0 = svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_32_general_x0:
+** firstp x([0-9]+), p0, p1\.b
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b8_32_general_x0, uint32_t,
+ x0 += svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_32_general_x1:
+** firstp x([0-9]+), p0, p1\.b
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b8_32_general_x1, uint32_t,
+ x0 = x1 + svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.b
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b8_64_general_x0, uint64_t,
+ x0 += svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.b
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b8_64_general_x1, uint64_t,
+ x0 = x1 + svfirstp_b8 (p0, p1));
+
+/*
+** firstp_dec_b8_32_general_x0:
+** firstp x([0-9]+), p0, p1\.b
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b8_32_general_x0, uint32_t,
+ x0 -= svfirstp_b8 (p0, p1));
+
+/*
+** firstp_dec_b8_32_general_x1:
+** firstp x([0-9]+), p0, p1\.b
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b8_32_general_x1, uint32_t,
+ x0 = x1 - svfirstp_b8 (p0, p1));
+
+/*
+** firstp_dec_b8_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.b
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b8_64_general_x0, uint64_t,
+ x0 -= svfirstp_b8 (p0, p1));
+
+/*
+** firstp_dec_b8_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.b
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b8_64_general_x1, uint64_t,
+ x0 = x1 - svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_u8_general_z0:
+** firstp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** add z0\.b, (z0\.b, \2|\2, z0\.b)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z0, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)),
+ z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)));
+
+/*
+** firstp_inc_b8_u8_general_z1:
+** firstp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** add z0\.b, (z1\.b, \2|\2, z1\.b)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z1, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)),
+ z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)));
+
+/*
+** firstp_inc_b8_u8_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** add z0\.b, (z0\.b, \3|\3, z0\.b)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z0, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)),
+ z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)));
+
+/*
+** firstp_inc_b8_u8_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** add z0\.b, (z1\.b, \3|\3, z1\.b)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z1, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)),
+ z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)));
+
+/*
+** firstp_dec_b8_u8_general_z0:
+** firstp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** sub z0\.b, z0\.b, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z0, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)),
+ z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)));
+
+/*
+** firstp_dec_b8_u8_general_z1:
+** firstp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** sub z0\.b, z1\.b, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z1, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)),
+ z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)));
+
+/*
+** firstp_dec_b8_u8_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** sub z0\.b, z0\.b, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z0, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)),
+ z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)));
+
+/*
+** firstp_dec_b8_u8_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** sub z0\.b, z1\.b, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z1, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)),
+ z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)));
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** lastp_b16_32:
+** lastp x0, p0, p1\.h
+** ret
+*/
+TEST_PTEST (lastp_b16_32, uint32_t,
+ x0 = svlastp_b16 (p0, p1));
+
+/*
+** lastp_b16_64:
+** lastp x0, p0, p1\.h
+** ret
+*/
+TEST_PTEST (lastp_b16_64, uint64_t,
+ x0 = svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_32_general_x0:
+** lastp x([0-9]+), p0, p1\.h
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b16_32_general_x0, uint32_t,
+ x0 += svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_32_general_x1:
+** lastp x([0-9]+), p0, p1\.h
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b16_32_general_x1, uint32_t,
+ x0 = x1 + svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.h
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b16_64_general_x0, uint64_t,
+ x0 += svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.h
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b16_64_general_x1, uint64_t,
+ x0 = x1 + svlastp_b16 (p0, p1));
+
+/*
+** lastp_dec_b16_32_general_x0:
+** lastp x([0-9]+), p0, p1\.h
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b16_32_general_x0, uint32_t,
+ x0 -= svlastp_b16 (p0, p1));
+
+/*
+** lastp_dec_b16_32_general_x1:
+** lastp x([0-9]+), p0, p1\.h
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b16_32_general_x1, uint32_t,
+ x0 = x1 - svlastp_b16 (p0, p1));
+
+/*
+** lastp_dec_b16_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.h
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b16_64_general_x0, uint64_t,
+ x0 -= svlastp_b16 (p0, p1));
+
+/*
+** lastp_dec_b16_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.h
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b16_64_general_x1, uint64_t,
+ x0 = x1 - svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_u16_general_z0:
+** lastp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** add z0\.h, (z0\.h, \2|\2, z0\.h)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z0, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)),
+ z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)));
+
+/*
+** lastp_inc_b16_u16_general_z1:
+** lastp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** add z0\.h, (z1\.h, \2|\2, z1\.h)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z1, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)),
+ z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)));
+
+/*
+** lastp_inc_b16_u16_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** add z0\.h, (z0\.h, \3|\3, z0\.h)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z0, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)),
+ z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)));
+
+/*
+** lastp_inc_b16_u16_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** add z0\.h, (z1\.h, \3|\3, z1\.h)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z1, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)),
+ z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)));
+
+/*
+** lastp_dec_b16_u16_general_z0:
+** lastp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** sub z0\.h, z0\.h, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z0, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)),
+ z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)));
+
+/*
+** lastp_dec_b16_u16_general_z1:
+** lastp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** sub z0\.h, z1\.h, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z1, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)),
+ z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)));
+
+/*
+** lastp_dec_b16_u16_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** sub z0\.h, z0\.h, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z0, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)),
+ z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)));
+
+/*
+** lastp_dec_b16_u16_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** sub z0\.h, z1\.h, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z1, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)),
+ z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)));
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** lastp_b32_32:
+** lastp x0, p0, p1\.s
+** ret
+*/
+TEST_PTEST (lastp_b32_32, uint32_t,
+ x0 = svlastp_b32 (p0, p1));
+
+/*
+** lastp_b32_64:
+** lastp x0, p0, p1\.s
+** ret
+*/
+TEST_PTEST (lastp_b32_64, uint64_t,
+ x0 = svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_32_general_x0:
+** lastp x([0-9]+), p0, p1\.s
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b32_32_general_x0, uint32_t,
+ x0 += svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_32_general_x1:
+** lastp x([0-9]+), p0, p1\.s
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b32_32_general_x1, uint32_t,
+ x0 = x1 + svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.s
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b32_64_general_x0, uint64_t,
+ x0 += svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.s
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b32_64_general_x1, uint64_t,
+ x0 = x1 + svlastp_b32 (p0, p1));
+
+/*
+** lastp_dec_b32_32_general_x0:
+** lastp x([0-9]+), p0, p1\.s
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b32_32_general_x0, uint32_t,
+ x0 -= svlastp_b32 (p0, p1));
+
+/*
+** lastp_dec_b32_32_general_x1:
+** lastp x([0-9]+), p0, p1\.s
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b32_32_general_x1, uint32_t,
+ x0 = x1 - svlastp_b32 (p0, p1));
+
+/*
+** lastp_dec_b32_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.s
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b32_64_general_x0, uint64_t,
+ x0 -= svlastp_b32 (p0, p1));
+
+/*
+** lastp_dec_b32_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.s
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b32_64_general_x1, uint64_t,
+ x0 = x1 - svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_u32_general_z0:
+** lastp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** add z0\.s, (z0\.s, \2|\2, z0\.s)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z0, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)),
+ z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)));
+
+/*
+** lastp_inc_b32_u32_general_z1:
+** lastp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** add z0\.s, (z1\.s, \2|\2, z1\.s)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z1, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)),
+ z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)));
+
+/*
+** lastp_inc_b32_u32_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** add z0\.s, (z0\.s, \3|\3, z0\.s)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z0, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)),
+ z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)));
+
+/*
+** lastp_inc_b32_u32_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** add z0\.s, (z1\.s, \3|\3, z1\.s)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z1, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)),
+ z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)));
+
+/*
+** lastp_dec_b32_u32_general_z0:
+** lastp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** sub z0\.s, z0\.s, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z0, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)),
+ z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)));
+
+/*
+** lastp_dec_b32_u32_general_z1:
+** lastp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** sub z0\.s, z1\.s, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z1, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)),
+ z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)));
+
+/*
+** lastp_dec_b32_u32_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** sub z0\.s, z0\.s, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z0, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)),
+ z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)));
+
+/*
+** lastp_dec_b32_u32_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** sub z0\.s, z1\.s, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z1, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)),
+ z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)));
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** lastp_b64_32:
+** lastp x0, p0, p1\.d
+** ret
+*/
+TEST_PTEST (lastp_b64_32, uint32_t,
+ x0 = svlastp_b64 (p0, p1));
+
+/*
+** lastp_b64_64:
+** lastp x0, p0, p1\.d
+** ret
+*/
+TEST_PTEST (lastp_b64_64, uint64_t,
+ x0 = svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_32_general_x0:
+** lastp x([0-9]+), p0, p1\.d
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b64_32_general_x0, uint32_t,
+ x0 += svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_32_general_x1:
+** lastp x([0-9]+), p0, p1\.d
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b64_32_general_x1, uint32_t,
+ x0 = x1 + svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.d
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b64_64_general_x0, uint64_t,
+ x0 += svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.d
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b64_64_general_x1, uint64_t,
+ x0 = x1 + svlastp_b64 (p0, p1));
+
+/*
+** lastp_dec_b64_32_general_x0:
+** lastp x([0-9]+), p0, p1\.d
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b64_32_general_x0, uint32_t,
+ x0 -= svlastp_b64 (p0, p1));
+
+/*
+** lastp_dec_b64_32_general_x1:
+** lastp x([0-9]+), p0, p1\.d
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b64_32_general_x1, uint32_t,
+ x0 = x1 - svlastp_b64 (p0, p1));
+
+/*
+** lastp_dec_b64_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.d
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b64_64_general_x0, uint64_t,
+ x0 -= svlastp_b64 (p0, p1));
+
+/*
+** lastp_dec_b64_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.d
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b64_64_general_x1, uint64_t,
+ x0 = x1 - svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_u64_general_z0:
+** lastp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** add z0\.d, (z0\.d, \2|\2, z0\.d)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z0, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)),
+ z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)));
+
+/*
+** lastp_inc_b64_u64_general_z1:
+** lastp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** add z0\.d, (z1\.d, \2|\2, z1\.d)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z1, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)),
+ z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)));
+
+/*
+** lastp_inc_b64_u64_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** add z0\.d, (z0\.d, \3|\3, z0\.d)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z0, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)),
+ z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)));
+
+/*
+** lastp_inc_b64_u64_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** add z0\.d, (z1\.d, \3|\3, z1\.d)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z1, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)),
+ z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)));
+
+/*
+** lastp_dec_b64_u64_general_z0:
+** lastp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** sub z0\.d, z0\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z0, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)),
+ z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)));
+
+/*
+** lastp_dec_b64_u64_general_z1:
+** lastp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** sub z0\.d, z1\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z1, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)),
+ z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)));
+
+/*
+** lastp_dec_b64_u64_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** sub z0\.d, z0\.d, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z0, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)),
+ z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)));
+
+/*
+** lastp_dec_b64_u64_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** sub z0\.d, z1\.d, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z1, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)),
+ z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)));
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** lastp_b8_32:
+** lastp x0, p0, p1\.b
+** ret
+*/
+TEST_PTEST (lastp_b8_32, uint32_t,
+ x0 = svlastp_b8 (p0, p1));
+
+/*
+** lastp_b8_64:
+** lastp x0, p0, p1\.b
+** ret
+*/
+TEST_PTEST (lastp_b8_64, uint64_t,
+ x0 = svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_32_general_x0:
+** lastp x([0-9]+), p0, p1\.b
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b8_32_general_x0, uint32_t,
+ x0 += svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_32_general_x1:
+** lastp x([0-9]+), p0, p1\.b
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b8_32_general_x1, uint32_t,
+ x0 = x1 + svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.b
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b8_64_general_x0, uint64_t,
+ x0 += svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.b
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b8_64_general_x1, uint64_t,
+ x0 = x1 + svlastp_b8 (p0, p1));
+
+/*
+** lastp_dec_b8_32_general_x0:
+** lastp x([0-9]+), p0, p1\.b
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b8_32_general_x0, uint32_t,
+ x0 -= svlastp_b8 (p0, p1));
+
+/*
+** lastp_dec_b8_32_general_x1:
+** lastp x([0-9]+), p0, p1\.b
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b8_32_general_x1, uint32_t,
+ x0 = x1 - svlastp_b8 (p0, p1));
+
+/*
+** lastp_dec_b8_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.b
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b8_64_general_x0, uint64_t,
+ x0 -= svlastp_b8 (p0, p1));
+
+/*
+** lastp_dec_b8_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.b
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b8_64_general_x1, uint64_t,
+ x0 = x1 - svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_u8_general_z0:
+** lastp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** add z0\.b, (z0\.b, \2|\2, z0\.b)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z0, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)),
+ z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)));
+
+/*
+** lastp_inc_b8_u8_general_z1:
+** lastp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** add z0\.b, (z1\.b, \2|\2, z1\.b)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z1, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)),
+ z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)));
+
+/*
+** lastp_inc_b8_u8_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** add z0\.b, (z0\.b, \3|\3, z0\.b)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z0, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)),
+ z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)));
+
+/*
+** lastp_inc_b8_u8_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** add z0\.b, (z1\.b, \3|\3, z1\.b)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z1, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)),
+ z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)));
+
+/*
+** lastp_dec_b8_u8_general_z0:
+** lastp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** sub z0\.b, z0\.b, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z0, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)),
+ z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)));
+
+/*
+** lastp_dec_b8_u8_general_z1:
+** lastp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** sub z0\.b, z1\.b, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z1, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)),
+ z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)));
+
+/*
+** lastp_dec_b8_u8_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** sub z0\.b, z0\.b, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z0, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)),
+ z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)));
+
+/*
+** lastp_dec_b8_u8_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** sub z0\.b, z1\.b, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z1, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)),
+ z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)));
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p2"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** mov x0, 0
+** ret
+*/
+uint64_t
+test1 ()
+{
+ return svfirstp_b8 (svptrue_b8 (),
+ svptrue_b8 ());
+}
+
+/*
+** test2:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test2 ()
+{
+ return svfirstp_b8 (svpfalse_b (),
+ svptrue_b8 ());
+}
+
+/*
+** test3:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test3 ()
+{
+ return svfirstp_b8 (svptrue_b8 (),
+ svpfalse_b ());
+}
+
+/*
+** test4:
+** mov x0, 15
+** ret
+*/
+uint64_t
+test4 ()
+{
+ return svfirstp_b8 (svdupq_n_b8 (false, false, false, false,
+ false, false, false, false,
+ false, false, false, false,
+ false, false, false, true),
+ svptrue_b8 ());
+}
+
+/*
+** test5:
+** mov x0, 0
+** ret
+*/
+uint64_t
+test5 ()
+{
+ return svfirstp_b16 (svptrue_b16 (),
+ svptrue_b16 ());
+}
+
+/*
+** test6:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test6 ()
+{
+ return svfirstp_b16 (svpfalse_b (),
+ svptrue_b16 ());
+}
+
+/*
+** test7:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test7 ()
+{
+ return svfirstp_b16 (svptrue_b16 (),
+ svpfalse_b ());
+}
+
+/*
+** test8:
+** mov x0, 7
+** ret
+*/
+uint64_t
+test8 ()
+{
+ return svfirstp_b16 (svdupq_n_b16 (false, false, false, false,
+ false, false, false, true),
+ svptrue_b16 ());
+}
+
+/*
+** test9:
+** mov x0, 0
+** ret
+*/
+uint64_t
+test9 ()
+{
+ return svfirstp_b32 (svptrue_b32 (),
+ svptrue_b32 ());
+}
+
+/*
+** test10:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test10 ()
+{
+ return svfirstp_b32 (svpfalse_b (),
+ svptrue_b32 ());
+}
+
+/*
+** test11:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test11 ()
+{
+ return svfirstp_b32 (svptrue_b32 (),
+ svpfalse_b ());
+}
+
+/*
+** test12:
+** mov x0, 3
+** ret
+*/
+uint64_t
+test12 ()
+{
+ return svfirstp_b32 (svdupq_n_b32 (false, false, false, true),
+ svptrue_b32 ());
+}
+
+/*
+** test13:
+** mov x0, 0
+** ret
+*/
+uint64_t
+test13 ()
+{
+ return svfirstp_b64 (svptrue_b64 (),
+ svptrue_b64 ());
+}
+
+/*
+** test14:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test14 ()
+{
+ return svfirstp_b64 (svpfalse_b (),
+ svptrue_b64 ());
+}
+
+/*
+** test15:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test15 ()
+{
+ return svfirstp_b64 (svptrue_b64 (),
+ svpfalse_b ());
+}
+
+/*
+** test16:
+** mov x0, 1
+** ret
+*/
+uint64_t
+test16 ()
+{
+ return svfirstp_b64 (svdupq_n_b64 (false, true),
+ svptrue_b64 ());
+}
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p2"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** mov x0, 31
+** ret
+*/
+uint64_t
+test1 ()
+{
+ return svlastp_b8 (svptrue_b8 (),
+ svptrue_b8 ());
+}
+
+/*
+** test2:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test2 ()
+{
+ return svlastp_b8 (svpfalse_b (),
+ svptrue_b8 ());
+}
+
+/*
+** test3:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test3 ()
+{
+ return svlastp_b8 (svptrue_b8 (),
+ svpfalse_b ());
+}
+
+/*
+** test4:
+** mov x0, 31
+** ret
+*/
+uint64_t
+test4 ()
+{
+ return svlastp_b8 (svdupq_n_b8 (false, false, false, false,
+ false, false, false, false,
+ false, false, false, false,
+ false, false, false, true),
+ svptrue_b8 ());
+}
+
+/*
+** test5:
+** mov x0, 15
+** ret
+*/
+uint64_t
+test5 ()
+{
+ return svlastp_b16 (svptrue_b16 (),
+ svptrue_b16 ());
+}
+
+/*
+** test6:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test6 ()
+{
+ return svlastp_b16 (svpfalse_b (),
+ svptrue_b16 ());
+}
+
+/*
+** test7:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test7 ()
+{
+ return svlastp_b16 (svptrue_b16 (),
+ svpfalse_b ());
+}
+
+/*
+** test8:
+** mov x0, 15
+** ret
+*/
+uint64_t
+test8 ()
+{
+ return svlastp_b16 (svdupq_n_b16 (false, false, false, false,
+ false, false, false, true),
+ svptrue_b16 ());
+}
+
+/*
+** test9:
+** mov x0, 7
+** ret
+*/
+uint64_t
+test9 ()
+{
+ return svlastp_b32 (svptrue_b32 (),
+ svptrue_b32 ());
+}
+
+/*
+** test10:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test10 ()
+{
+ return svlastp_b32 (svpfalse_b (),
+ svptrue_b32 ());
+}
+
+/*
+** test11:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test11 ()
+{
+ return svlastp_b32 (svptrue_b32 (),
+ svpfalse_b ());
+}
+
+/*
+** test12:
+** mov x0, 7
+** ret
+*/
+uint64_t
+test12 ()
+{
+ return svlastp_b32 (svdupq_n_b32 (false, false, false, true),
+ svptrue_b32 ());
+}
+
+/*
+** test13:
+** mov x0, 3
+** ret
+*/
+uint64_t
+test13 ()
+{
+ return svlastp_b64 (svptrue_b64 (),
+ svptrue_b64 ());
+}
+
+/*
+** test14:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test14 ()
+{
+ return svlastp_b64 (svpfalse_b (),
+ svptrue_b64 ());
+}
+
+/*
+** test15:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test15 ()
+{
+ return svlastp_b64 (svptrue_b64 (),
+ svpfalse_b ());
+}
+
+/*
+** test16:
+** mov x0, 3
+** ret
+*/
+uint64_t
+test16 ()
+{
+ return svlastp_b64 (svdupq_n_b64 (false, true),
+ svptrue_b64 ());
+}
+
+#ifdef __cplusplus
+}
+#endif