aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile);
aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile);
aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
+ aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
/* Not for ACLE, but required to keep "float.h" correct if we switch
target between implementations that do or do not support ARMv8.2-A
;; ---- Single-vector stores
;; ---- Table stores
;; ---- Single-vector moves
+;; ---- Multi-vector moves
;; ---- Zeroing
;;
;; == Binary arithmetic
;; ---- Binary arithmetic on ZA tile
+;; ---- Binary arithmetic on ZA slice
+;; ---- Binary arithmetic, writing to ZA slice
;;
;; == Ternary arithmetic
+;; ---- [INT] Dot product
+;; ---- [INT] Ternary widening arithmetic on ZA slice
;; ---- [INT] Sum of outer products
+;; ---- [FP] Dot product
+;; ---- [FP] Ternary arithmetic on ZA slice
+;; ---- [FP] Ternary widening arithmetic on ZA slice
;; ---- [FP] Sum of outer products
+;;
+;; == Table lookup
+;; ---- Table lookup
;; =========================================================================
;; == State management
"mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q"
)
+;; -------------------------------------------------------------------------
+;; ---- Multi-vector moves
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOVA
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+ [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_FULLx24
+ [(reg:SVE_FULLx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 1 "const_int_operand")
+ (match_operand:SI 2 "register_operand" "Ucj")]
+ SME_READ))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[3] = GEN_INT (<vector_count> - 1);
+ return "mova\t%0, za%1<hv>.<Vetype>[%w2, 0:%3]";
+ }
+)
+
+(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
+ [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_FULLx24
+ [(reg:SVE_FULLx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 1 "const_int_operand")
+ (plus:SI
+ (match_operand:SI 2 "register_operand" "Ucj")
+ (match_operand:SI 3 "const_int_operand"))]
+ SME_READ))]
+ "TARGET_STREAMING_SME2
+ && UINTVAL (operands[3]) % <vector_count> == 0
+ && UINTVAL (operands[3]) < 128 / <elem_bits>"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[3]) + <vector_count> - 1);
+ return "mova\t%0, za%1<hv>.<Vetype>[%w2, %3:%4]";
+ }
+)
+
+(define_insn "@aarch64_sme_read<mode>"
+ [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_DIx24
+ [(reg:SVE_DIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 1 "register_operand" "Uci")]
+ UNSPEC_SME_READ))]
+ "TARGET_STREAMING_SME2"
+ "mova\t%0, za.d[%w1, 0, vgx<vector_count>]"
+)
+
+(define_insn "*aarch64_sme_read<mode>_plus"
+ [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_DIx24
+ [(reg:SVE_DIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 1 "register_operand" "Uci")
+ (match_operand:SI 2 "const_0_to_7_operand"))]
+ UNSPEC_SME_READ))]
+ "TARGET_STREAMING_SME2"
+ "mova\t%0, za.d[%w1, %2, vgx<vector_count>]"
+)
+
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+ [(set (reg:SVE_FULLx24 ZA_REGNUM)
+ (unspec:SVE_FULLx24
+ [(reg:SVE_FULLx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (match_operand:SI 1 "register_operand" "Ucj")
+ (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_WRITE))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[3] = GEN_INT (<vector_count> - 1);
+ return "mova\tza%0<hv>.<Vetype>[%w1, 0:%3], %2";
+ }
+)
+
+(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
+ [(set (reg:SVE_FULLx24 ZA_REGNUM)
+ (unspec:SVE_FULLx24
+ [(reg:SVE_FULLx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (plus:SI
+ (match_operand:SI 1 "register_operand" "Ucj")
+ (match_operand:SI 2 "const_int_operand"))
+ (match_operand:SVE_FULLx24 3 "aligned_register_operand" "Uw<vector_count>")]
+ SME_WRITE))]
+ "TARGET_STREAMING_SME2
+ && UINTVAL (operands[2]) % <vector_count> == 0
+ && UINTVAL (operands[2]) < 128 / <elem_bits>"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[2]) + <vector_count> - 1);
+ return "mova\tza%0<hv>.<Vetype>[%w1, %2:%4], %3";
+ }
+)
+
+(define_insn "@aarch64_sme_write<mode>"
+ [(set (reg:SVE_DIx24 ZA_REGNUM)
+ (unspec:SVE_DIx24
+ [(reg:SVE_DIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SVE_DIx24 1 "aligned_register_operand" "Uw<vector_count>")]
+ UNSPEC_SME_READ))]
+ "TARGET_STREAMING_SME2"
+ "mova\tza.d[%w0, 0, vgx<vector_count>], %1"
+)
+
+(define_insn "*aarch64_sme_write<mode>_plus"
+ [(set (reg:SVE_DIx24 ZA_REGNUM)
+ (unspec:SVE_DIx24
+ [(reg:SVE_DIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SVE_DIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ UNSPEC_SME_READ))]
+ "TARGET_STREAMING_SME2"
+ "mova\tza.d[%w0, %1, vgx<vector_count>], %2"
+)
+
;; -------------------------------------------------------------------------
;; ---- Zeroing
;; -------------------------------------------------------------------------
}
)
+(define_insn "aarch64_sme_zero_zt0"
+ [(set (reg:V8DI ZT0_REGNUM)
+ (const_int 0))
+ (use (reg:DI SME_STATE_REGNUM))]
+ "TARGET_SME2"
+ "zero\t{ zt0 }"
+)
+
;; =========================================================================
;; == Binary arithmetic
;; =========================================================================
"<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>"
)
+;; -------------------------------------------------------------------------
+;; ---- Binary arithmetic on ZA slice
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADD
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><mode>"
+ [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (unspec:SME_ZA_SDIx24
+ [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")]
+ SME_BINARY_SLICE_SDI))]
+ "TARGET_STREAMING_SME2"
+ "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
+)
+
+(define_insn "*aarch64_sme_<optab><mode>_plus"
+ [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (unspec:SME_ZA_SDIx24
+ [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_BINARY_SLICE_SDI))]
+ "TARGET_STREAMING_SME2"
+ "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
+)
+
+(define_insn "@aarch64_sme_<optab><mode>"
+ [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+ (unspec:SME_ZA_SDFx24
+ [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
+ SME_BINARY_SLICE_SDF))]
+ "TARGET_STREAMING_SME2"
+ "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
+)
+
+(define_insn "*aarch64_sme_<optab><mode>_plus"
+ [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+ (unspec:SME_ZA_SDFx24
+ [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_BINARY_SLICE_SDF))]
+ "TARGET_STREAMING_SME2"
+ "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Binary arithmetic, writing to ZA slice
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADD
+;; - SUB
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><mode>"
+ [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (unspec:SME_ZA_SDIx24
+ [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_BINARY_WRITE_SLICE_SDI))]
+ "TARGET_STREAMING_SME2"
+ "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><mode>_plus"
+ [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (unspec:SME_ZA_SDIx24
+ [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_SDIx24 3 "aligned_register_operand" "Uw<vector_count>")]
+ SME_BINARY_WRITE_SLICE_SDI))]
+ "TARGET_STREAMING_SME2"
+ "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
+)
+
+(define_insn "@aarch64_sme_single_<optab><mode>"
+ [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (unspec:SME_ZA_SDIx24
+ [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_SDIx24 1 "register_operand" "w")
+ (vec_duplicate:SME_ZA_SDIx24
+ (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+ SME_BINARY_WRITE_SLICE_SDI))]
+ "TARGET_STREAMING_SME2"
+ "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
+)
+
+(define_insn "*aarch64_sme_single_<optab><mode>_plus"
+ [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (unspec:SME_ZA_SDIx24
+ [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_SDIx24 2 "register_operand" "w")
+ (vec_duplicate:SME_ZA_SDIx24
+ (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+ SME_BINARY_WRITE_SLICE_SDI))]
+ "TARGET_STREAMING_SME2"
+ "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
+)
+
;; =========================================================================
;; == Ternary arithmetic
;; =========================================================================
;; -------------------------------------------------------------------------
-;; ---- [INT] Sum of outer products
+;; ---- [INT] Dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDOT
+;; - SUDOT
+;; - UDOT
+;; - USDOT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
+ [(set (reg:SME_ZA_SDI ZA_REGNUM)
+ (unspec:SME_ZA_SDI
+ [(reg:SME_ZA_SDI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_INT_DOTPROD))]
+ "TARGET_STREAMING_SME2
+ && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+ && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+ "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
+ [(set (reg:SME_ZA_SDI ZA_REGNUM)
+ (unspec:SME_ZA_SDI
+ [(reg:SME_ZA_SDI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")]
+ SME_INT_DOTPROD))]
+ "TARGET_STREAMING_SME2
+ && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+ && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+ "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
+)
+
+(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
+ [(set (reg:SME_ZA_SDI ZA_REGNUM)
+ (unspec:SME_ZA_SDI
+ [(reg:SME_ZA_SDI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_BHIx24 1 "register_operand" "w")
+ (vec_duplicate:SME_ZA_BHIx24
+ (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+ SME_INT_DOTPROD))]
+ "TARGET_STREAMING_SME2
+ && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+ && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+ "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>"
+)
+
+(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
+ [(set (reg:SME_ZA_SDI ZA_REGNUM)
+ (unspec:SME_ZA_SDI
+ [(reg:SME_ZA_SDI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_BHIx24 2 "register_operand" "w")
+ (vec_duplicate:SME_ZA_BHIx24
+ (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+ SME_INT_DOTPROD))]
+ "TARGET_STREAMING_SME2
+ && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+ && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+ "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>"
+)
+
+;; SUDOT is USDOT with the operands swapped.
+(define_insn "@aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (vec_duplicate:SME_ZA_BIx24
+ (match_operand:<VSINGLE> 2 "register_operand" "x"))
+ (match_operand:SME_ZA_BIx24 1 "register_operand" "w")]
+ UNSPEC_SME_USDOT))]
+ "TARGET_STREAMING_SME2"
+ "sudot\tza.s[%w0, 0, vgx<vector_count>], %1, %2.b"
+)
+
+(define_insn "*aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (vec_duplicate:SME_ZA_BIx24
+ (match_operand:<VSINGLE> 3 "register_operand" "x"))
+ (match_operand:SME_ZA_BIx24 2 "register_operand" "w")]
+ UNSPEC_SME_USDOT))]
+ "TARGET_STREAMING_SME2"
+ "sudot\tza.s[%w0, %1, vgx<vector_count>], %2, %3.b"
+)
+
+(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
+ [(set (reg:SME_ZA_SDI ZA_REGNUM)
+ (unspec:SME_ZA_SDI
+ [(reg:SME_ZA_SDI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (unspec:SME_ZA_BHIx24
+ [(match_operand:<VSINGLE> 2 "register_operand" "x")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_INT_DOTPROD_LANE))]
+ "TARGET_STREAMING_SME2
+ && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+ && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+ "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
+ [(set (reg:SME_ZA_SDI ZA_REGNUM)
+ (unspec:SME_ZA_SDI
+ [(reg:SME_ZA_SDI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (unspec:SME_ZA_BHIx24
+ [(match_operand:<VSINGLE> 3 "register_operand" "x")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_INT_DOTPROD_LANE))]
+ "TARGET_STREAMING_SME2
+ && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+ && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+ "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>[%4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Ternary widening arithmetic on ZA slice
;; -------------------------------------------------------------------------
;; Includes:
+;; - SMLA
+;; - SMLS
+;; - UMLA
+;; - UMLS
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SVE_FULL_BHI 1 "register_operand" "w")
+ (match_operand:SVE_FULL_BHI 2 "register_operand" "x")]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>], %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+ (match_operand:SVE_FULL_BHI 2 "register_operand" "w")
+ (match_operand:SVE_FULL_BHI 3 "register_operand" "x")]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+ return "<optab><za32_long>\tza.s[%w0, %1:%4], %2.<SVE_FULL_BHI:Vetype>, %3.<SVE_FULL_BHI:Vetype>";
+ }
+)
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+ (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+ return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3";
+ }
+)
+
+(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_BHIx24 1 "register_operand" "w")
+ (vec_duplicate:SME_ZA_BHIx24
+ (match_operand:<SME_ZA_BHIx24:VSINGLE> 2 "register_operand" "x"))]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>"
+)
+
+(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+ (match_operand:SME_ZA_BHIx24 2 "register_operand" "w")
+ (vec_duplicate:SME_ZA_BHIx24
+ (match_operand:<SME_ZA_BHIx24:VSINGLE> 3 "register_operand" "x"))]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+ return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>";
+ }
+)
+
+(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_BHIx124 1 "<aligned_operand>" "<aligned_fpr>")
+ (unspec:SME_ZA_BHIx124
+ [(match_operand:<VSINGLE> 2 "register_operand" "x")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset><vg_modifier>], %1<z_suffix>, %2.<SME_ZA_BHIx124:Vetype>[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+ (match_operand:SME_ZA_BHIx124 2 "<aligned_operand>" "<aligned_fpr>")
+ (unspec:SME_ZA_BHIx124
+ [(match_operand:<VSINGLE> 3 "register_operand" "x")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[5] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+ return "<optab><za32_long>\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.<SME_ZA_BHIx124:Vetype>[%4]";
+ }
+)
+
+(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>"
+ [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+ (unspec:VNx2DI_ONLY
+ [(reg:VNx2DI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:VNx8HI_ONLY 1 "register_operand" "w")
+ (match_operand:VNx8HI_ONLY 2 "register_operand" "x")]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+ "<optab>ll\tza.d[%w0, 0:3], %1.h, %2.h"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>_plus"
+ [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+ (unspec:VNx2DI_ONLY
+ [(reg:VNx2DI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za64_offset_range>_operand"))
+ (match_operand:VNx8HI_ONLY 2 "register_operand" "w")
+ (match_operand:VNx8HI_ONLY 3 "register_operand" "x")]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
+ return "<optab>ll\tza.d[%w0, %1:%4], %2.h, %3.h";
+ }
+)
+
+(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>"
+ [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+ (unspec:VNx2DI_ONLY
+ [(reg:VNx2DI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_HIx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+ "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus"
+ [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+ (unspec:VNx2DI_ONLY
+ [(reg:VNx2DI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za64_offset_range>_operand"))
+ (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_HIx24 3 "aligned_register_operand" "Uw<vector_count>")]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
+ return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3";
+ }
+)
+
+(define_insn "@aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>"
+ [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+ (unspec:VNx2DI_ONLY
+ [(reg:VNx2DI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_HIx24 1 "register_operand" "w")
+ (vec_duplicate:SME_ZA_HIx24
+ (match_operand:<SME_ZA_HIx24:VSINGLE> 2 "register_operand" "x"))]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+ "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2.h"
+)
+
+(define_insn "*aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus"
+ [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+ (unspec:VNx2DI_ONLY
+ [(reg:VNx2DI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za64_offset_range>_operand"))
+ (match_operand:SME_ZA_HIx24 2 "register_operand" "w")
+ (vec_duplicate:SME_ZA_HIx24
+ (match_operand:<SME_ZA_HIx24:VSINGLE> 3 "register_operand" "x"))]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
+ return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3.h";
+ }
+)
+
+(define_insn "@aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>"
+ [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+ (unspec:VNx2DI_ONLY
+ [(reg:VNx2DI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_HIx124 1 "<aligned_operand>" "<aligned_fpr>")
+ (unspec:SME_ZA_HIx124
+ [(match_operand:<VSINGLE> 2 "register_operand" "x")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+ "<optab>ll\tza.d[%w0, 0:3<vg_modifier>], %1<z_suffix>, %2.h[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>"
+ [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+ (unspec:VNx2DI_ONLY
+ [(reg:VNx2DI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za64_offset_range>_operand"))
+ (match_operand:SME_ZA_HIx124 2 "<aligned_operand>" "<aligned_fpr>")
+ (unspec:SME_ZA_HIx124
+ [(match_operand:<VSINGLE> 3 "register_operand" "x")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_INT_TERNARY_SLICE))]
+ "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+ {
+ operands[5] = GEN_INT (INTVAL (operands[1]) + 3);
+ return "<optab>ll\tza.d[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]";
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sum of outer products
+;; -------------------------------------------------------------------------
+;; - BMOPA
+;; - BMOPS
;; - SMOPA
;; - SMOPS
;; - SUMOPA
"<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h"
)
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx8HI_ONLY:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
+ (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
+ (match_operand:VNx8HI_ONLY 3 "register_operand" "w")
+ (match_operand:VNx8HI_ONLY 4 "register_operand" "w")]
+ SME2_INT_MOP))]
+ "TARGET_STREAMING_SME2"
+ "<optab>\tza%0.s, %1/m, %2/m, %3.h, %4.h"
+)
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx4SI_ONLY:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
+ (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
+ (match_operand:VNx4SI_ONLY 3 "register_operand" "w")
+ (match_operand:VNx4SI_ONLY 4 "register_operand" "w")]
+ SME2_BMOP))]
+ "TARGET_STREAMING_SME2"
+ "<optab>\tza%0.s, %1/m, %2/m, %3.s, %4.s"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFDOT
+;; - FDOT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_FP_DOTPROD))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")]
+ SME_FP_DOTPROD))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3"
+)
+
+(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_HFx24 1 "register_operand" "w")
+ (vec_duplicate:SME_ZA_HFx24
+ (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+ SME_FP_DOTPROD))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h"
+)
+
+(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_HFx24 2 "register_operand" "w")
+ (vec_duplicate:SME_ZA_HFx24
+ (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+ SME_FP_DOTPROD))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h"
+)
+
+(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (unspec:SME_ZA_HFx24
+ [(match_operand:<VSINGLE> 2 "register_operand" "x")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_FP_DOTPROD_LANE))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (unspec:SME_ZA_HFx24
+ [(match_operand:<VSINGLE> 3 "register_operand" "x")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_FP_DOTPROD_LANE))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h[%4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Ternary arithmetic on ZA slice
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLA
+;; - FMLS
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
+ [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+ (unspec:SME_ZA_SDF_I
+ [(reg:SME_ZA_SDF_I ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_SME2
+ && TARGET_STREAMING_SME
+ && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+ "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
+ [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+ (unspec:SME_ZA_SDF_I
+ [(reg:SME_ZA_SDF_I ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_SME2
+ && TARGET_STREAMING_SME
+ && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+ "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
+)
+
+(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
+ [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+ (unspec:SME_ZA_SDF_I
+ [(reg:SME_ZA_SDF_I ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_SDFx24 1 "register_operand" "w")
+ (vec_duplicate:SME_ZA_SDFx24
+ (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_SME2
+ && TARGET_STREAMING_SME
+ && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+ "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>"
+)
+
+(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
+ [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+ (unspec:SME_ZA_SDF_I
+ [(reg:SME_ZA_SDF_I ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_SDFx24 2 "register_operand" "w")
+ (vec_duplicate:SME_ZA_SDFx24
+ (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_SME2
+ && TARGET_STREAMING_SME
+ && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+ "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>"
+)
+
+(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
+ [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+ (unspec:SME_ZA_SDF_I
+ [(reg:SME_ZA_SDF_I ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (unspec:SME_ZA_SDFx24
+ [(match_operand:<VSINGLE> 2 "register_operand" "x")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_SME2
+ && TARGET_STREAMING_SME
+ && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+ "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
+ [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+ (unspec:SME_ZA_SDF_I
+ [(reg:SME_ZA_SDF_I ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))
+ (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (unspec:SME_ZA_SDFx24
+ [(match_operand:<VSINGLE> 3 "register_operand" "x")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_SME2
+ && TARGET_STREAMING_SME
+ && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+ "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>[%4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Ternary widening arithmetic on ZA slice
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFMLAL
+;; - BFMLSL
+;; - FMLAL
+;; - FMLSL
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SVE_FULL_HF 1 "register_operand" "w")
+ (match_operand:SVE_FULL_HF 2 "register_operand" "x")]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>l\tza.s[%w0, 0:1], %1.h, %2.h"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+ (match_operand:SVE_FULL_HF 2 "register_operand" "w")
+ (match_operand:SVE_FULL_HF 3 "register_operand" "x")]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
+ return "<b><optab>l\tza.s[%w0, %1:%4], %2.h, %3.h";
+ }
+)
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+ (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
+ return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3";
+ }
+)
+
+(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_HFx24 1 "register_operand" "w")
+ (vec_duplicate:SME_ZA_HFx24
+ (match_operand:<SME_ZA_HFx24:VSINGLE> 2 "register_operand" "x"))]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2.h"
+)
+
+(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+ (match_operand:SME_ZA_HFx24 2 "register_operand" "w")
+ (vec_duplicate:SME_ZA_HFx24
+ (match_operand:<SME_ZA_HFx24:VSINGLE> 3 "register_operand" "x"))]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
+ return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.h";
+ }
+)
+
+(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SME_ZA_HFx124 1 "<aligned_operand>" "<aligned_fpr>")
+ (unspec:SME_ZA_HFx124
+ [(match_operand:<VSINGLE> 2 "register_operand" "x")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ "<b><optab>l\tza.s[%w0, 0:1<vg_modifier>], %1<z_suffix>, %2.h[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>"
+ [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+ (unspec:VNx4SI_ONLY
+ [(reg:VNx4SI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+ (match_operand:SME_ZA_HFx124 2 "<aligned_operand>" "<aligned_fpr>")
+ (unspec:SME_ZA_HFx124
+ [(match_operand:<VSINGLE> 3 "register_operand" "x")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SME_FP_TERNARY_SLICE))]
+ "TARGET_STREAMING_SME2"
+ {
+ operands[5] = GEN_INT (INTVAL (operands[1]) + 1);
+ return "<b><optab>l\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]";
+ }
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP] Sum of outer products
;; -------------------------------------------------------------------------
&& (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)"
"<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>"
)
+
+;; =========================================================================
+;; == Table lookup
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Table lookup
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LUTI2
+;; - LUTI4
+;; -------------------------------------------------------------------------
+
+(define_c_enum "unspec" [
+ UNSPEC_SME_LUTI
+])
+
+(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>"
+ [(set (match_operand:SVE_FULL_BHS 0 "register_operand" "=w")
+ (unspec:SVE_FULL_BHS
+ [(reg:V8DI ZT0_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:VNx16QI 1 "register_operand" "w")
+ (match_operand:DI 2 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SME_LUTI))]
+ "TARGET_STREAMING_SME2"
+ "luti<LUTI_BITS>\t%0.<Vetype>, zt0, %1[%2]"
+)
+
+(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>"
+ [(set (match_operand:SVE_BHSx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_BHSx24
+ [(reg:V8DI ZT0_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:VNx16QI 1 "register_operand" "w")
+ (match_operand:DI 2 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SME_LUTI))]
+ "TARGET_STREAMING_SME2
+ && !(<LUTI_BITS> == 4 && <vector_count> == 4 && <elem_bits> == 8)"
+ "luti<LUTI_BITS>\t%0, zt0, %1[%2]"
+)
{
machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode);
- return e.map_to_rtx_codes (AND, AND, -1);
+ return e.map_to_rtx_codes (AND, AND, -1, -1);
}
if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
rtx
expand (function_expander &e) const override
{
+ if (e.type_suffix (0).tclass == TYPE_count)
+ {
+ unsigned int bits = e.type_suffix (0).element_bits;
+ return e.use_exact_insn (code_for_aarch64_sve_cntp_c (bits));
+ }
+
machine_mode mode = e.vector_mode (0);
e.add_ptrue_hint (0, mode);
return e.use_exact_insn (code_for_aarch64_pred_cntp (mode));
rtx
expand (function_expander &e) const override
{
+ insn_code icode;
+ if (e.pred == PRED_none)
+ {
+ machine_mode mode0 = e.result_mode ();
+ machine_mode mode1 = GET_MODE (e.args[0]);
+ convert_optab optab;
+ if (e.type_suffix (0).integer_p)
+ optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab;
+ else if (e.type_suffix (1).integer_p)
+ optab = e.type_suffix (1).unsigned_p ? ufloat_optab : sfloat_optab;
+ else
+ optab = trunc_optab;
+ icode = convert_optab_handler (optab, mode0, mode1);
+ gcc_assert (icode != CODE_FOR_nothing);
+ return e.use_exact_insn (icode);
+ }
machine_mode mode0 = e.vector_mode (0);
machine_mode mode1 = e.vector_mode (1);
- insn_code icode;
/* All this complication comes from the need to select four things
simultaneously:
/* In the optab, the multiplication operands come before the accumulator
operand. The optab is keyed off the multiplication mode. */
e.rotate_inputs_left (0, 3);
- insn_code icode
- = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab,
- 0, GET_MODE (e.args[0]));
+ insn_code icode;
+ if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES)
+ icode = e.direct_optab_handler_for_sign (sdot_prod_optab,
+ udot_prod_optab,
+ 0, GET_MODE (e.args[0]));
+ else
+ icode = (e.type_suffix (0).float_p
+ ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf
+ : e.type_suffix (0).unsigned_p
+ ? CODE_FOR_aarch64_sve_udotvnx4sivnx8hi
+ : CODE_FOR_aarch64_sve_sdotvnx4sivnx8hi);
return e.use_unpred_insn (icode);
}
};
rtx
expand (function_expander &e) const override
{
+ machine_mode mode0 = GET_MODE (e.args[0]);
+ machine_mode mode1 = GET_MODE (e.args[1]);
/* Use the same ordering as the dot_prod_optab, with the
accumulator last. */
e.rotate_inputs_left (0, 4);
int unspec = unspec_for (e);
- machine_mode mode = e.vector_mode (0);
- return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode));
+ insn_code icode;
+ if (unspec == UNSPEC_FDOT)
+ icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf;
+ else
+ icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1);
+ return e.use_exact_insn (icode);
}
};
with an extra argument on the end. Take the inactive elements
from this extra argument. */
e.rotate_inputs_left (0, 4);
- return e.map_to_rtx_codes (AND, AND, -1, 3);
+ return e.map_to_rtx_codes (AND, AND, -1, -1, 3);
}
machine_mode wide_mode = e.vector_mode (0);
gimple *
fold (gimple_folder &f) const override
{
+ if (f.vectors_per_tuple () != 1)
+ return nullptr;
+
tree vectype = f.vector_type (0);
/* Get the predicate and base pointer. */
rtx
expand (function_expander &e) const override
{
- insn_code icode = convert_optab_handler (maskload_optab,
- e.vector_mode (0), e.gp_mode (0));
+ insn_code icode;
+ if (e.vectors_per_tuple () == 1)
+ icode = convert_optab_handler (maskload_optab,
+ e.vector_mode (0), e.gp_mode (0));
+ else
+ icode = code_for_aarch64_ld1 (e.tuple_mode (0));
return e.use_contiguous_load_insn (icode);
}
};
rtx
expand (function_expander &e) const override
{
- insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0));
+ insn_code icode = code_for_aarch64_ldnt1 (e.tuple_mode (0));
return e.use_contiguous_load_insn (icode);
}
};
gimple *
fold (gimple_folder &f) const override
{
- return f.fold_to_pfalse ();
+ if (f.type_suffix (0).tclass == TYPE_bool)
+ return f.fold_to_pfalse ();
+
+ return nullptr;
}
rtx
gimple *
fold (gimple_folder &f) const override
{
- return f.fold_to_ptrue ();
+ if (f.type_suffix (0).tclass == TYPE_bool)
+ return f.fold_to_ptrue ();
+
+ return nullptr;
}
rtx
expand (function_expander &e) const override
{
- return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
+ if (e.type_suffix (0).tclass == TYPE_bool)
+ return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
+
+ auto bits = e.type_suffix (0).element_bits;
+ return e.use_exact_insn (code_for_aarch64_sve_ptrue_c (bits));
}
};
}
};
+class svrint_impl : public function_base
+{
+public:
+ CONSTEXPR svrint_impl (optab_tag optab, int cond_unspec)
+ : m_optab (optab), m_cond_unspec (cond_unspec)
+ {}
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ if (e.pred == PRED_none)
+ {
+ auto icode = direct_optab_handler (m_optab, e.tuple_mode (0));
+ return e.use_exact_insn (icode);
+ }
+ return e.map_to_unspecs (-1, -1, m_cond_unspec);
+ }
+
+ optab_tag m_optab;
+ int m_cond_unspec;
+};
+
class svsel_impl : public quiet<function_base>
{
public:
gimple *
fold (gimple_folder &f) const override
{
+ if (f.vectors_per_tuple () > 1)
+ return nullptr;
+
/* svsel corresponds exactly to VEC_COND_EXPR. */
gimple_seq stmts = NULL;
tree pred = f.convert_pred (stmts, f.vector_type (0), 0);
{
/* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */
e.rotate_inputs_left (0, 3);
- insn_code icode = convert_optab_handler (vcond_mask_optab,
- e.vector_mode (0),
- e.gp_mode (0));
+ insn_code icode = (e.vectors_per_tuple () > 1
+ ? code_for_aarch64_sve_sel (e.tuple_mode (0))
+ : convert_optab_handler (vcond_mask_optab,
+ e.vector_mode (0),
+ e.gp_mode (0)));
return e.use_exact_insn (icode);
}
};
gimple *
fold (gimple_folder &f) const override
{
+ if (f.vectors_per_tuple () != 1)
+ return nullptr;
+
tree vectype = f.vector_type (0);
/* Get the predicate and base pointer. */
rtx
expand (function_expander &e) const override
{
- insn_code icode = convert_optab_handler (maskstore_optab,
- e.vector_mode (0), e.gp_mode (0));
+ insn_code icode;
+ if (e.vectors_per_tuple () == 1)
+ icode = convert_optab_handler (maskstore_optab,
+ e.vector_mode (0), e.gp_mode (0));
+ else
+ icode = code_for_aarch64_st1 (e.tuple_mode (0));
return e.use_contiguous_store_insn (icode);
}
};
rtx
expand (function_expander &e) const override
{
- insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0));
+ insn_code icode = code_for_aarch64_stnt1 (e.tuple_mode (0));
return e.use_contiguous_store_insn (icode);
}
};
/* Canonicalize subtractions of constants to additions. */
machine_mode mode = e.vector_mode (0);
if (e.try_negating_argument (2, mode))
- return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD);
+ return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD, -1);
return rtx_code_function::expand (e);
}
gimple *
fold (gimple_folder &f) const override
{
+ if (f.vectors_per_tuple () > 1)
+ return nullptr;
+
if (f.type_suffix (1).unsigned_p)
return fold_type<poly_uint64> (f);
else
FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
FUNCTION (svdot, svdot_impl,)
-FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1))
+FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT,
+ UNSPEC_FDOT))
FUNCTION (svdup, svdup_impl,)
FUNCTION (svdup_lane, svdup_lane_impl,)
FUNCTION (svdupq, svdupq_impl,)
FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
FUNCTION (svmad, svmad_impl,)
-FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX))
-FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM))
+FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX,
+ UNSPEC_FMAX))
+FUNCTION (svmaxnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMAXNM,
+ UNSPEC_FMAXNM))
FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV))
FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV))
-FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN))
-FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM))
+FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN,
+ UNSPEC_FMIN))
+FUNCTION (svminnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMINNM,
+ UNSPEC_FMINNM))
FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
FUNCTION (svmla, svmla_impl,)
FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1))
FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1))
FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1))
-FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA))
-FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI))
-FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM))
-FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN))
-FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP))
-FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX))
-FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ))
+FUNCTION (svrinta, svrint_impl, (round_optab, UNSPEC_COND_FRINTA))
+FUNCTION (svrinti, svrint_impl, (nearbyint_optab, UNSPEC_COND_FRINTI))
+FUNCTION (svrintm, svrint_impl, (floor_optab, UNSPEC_COND_FRINTM))
+FUNCTION (svrintn, svrint_impl, (roundeven_optab, UNSPEC_COND_FRINTN))
+FUNCTION (svrintp, svrint_impl, (ceil_optab, UNSPEC_COND_FRINTP))
+FUNCTION (svrintx, svrint_impl, (rint_optab, UNSPEC_COND_FRINTX))
+FUNCTION (svrintz, svrint_impl, (btrunc_optab, UNSPEC_COND_FRINTZ))
FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
DEF_SVE_FUNCTION (svcntw, count_inherent, none, none)
DEF_SVE_FUNCTION (svcntw_pat, count_pat, none, none)
DEF_SVE_FUNCTION (svcreate2, create, all_data, none)
+DEF_SVE_FUNCTION (svcreate2, create, b, none)
DEF_SVE_FUNCTION (svcreate3, create, all_data, none)
DEF_SVE_FUNCTION (svcreate4, create, all_data, none)
-DEF_SVE_FUNCTION (svcvt, unary_convert, cvt, mxz)
+DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt, mxz)
DEF_SVE_FUNCTION (svdiv, binary_opt_n, all_float_and_sd_integer, mxz)
DEF_SVE_FUNCTION (svdivr, binary_opt_n, all_float_and_sd_integer, mxz)
-DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n, sd_integer, none)
-DEF_SVE_FUNCTION (svdot_lane, ternary_qq_lane, sd_integer, none)
+DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n_or_011, sd_integer, none)
+DEF_SVE_FUNCTION (svdot_lane, ternary_qq_or_011_lane, sd_integer, none)
DEF_SVE_FUNCTION (svdup, unary_n, all_data, mxz_or_none)
DEF_SVE_FUNCTION (svdup, unary_n, all_pred, none)
DEF_SVE_FUNCTION (svdup_lane, binary_uint_n, all_data, none)
DEF_SVE_FUNCTION (svexth, unary, sd_integer, mxz)
DEF_SVE_FUNCTION (svextw, unary, d_integer, mxz)
DEF_SVE_FUNCTION (svget2, get, all_data, none)
+DEF_SVE_FUNCTION (svget2, get, b, none)
DEF_SVE_FUNCTION (svget3, get, all_data, none)
DEF_SVE_FUNCTION (svget4, get, all_data, none)
DEF_SVE_FUNCTION (svindex, binary_scalar, all_integer, none)
DEF_SVE_FUNCTION (svlsr, binary_uint_opt_n, all_unsigned, mxz)
DEF_SVE_FUNCTION (svlsr_wide, binary_uint64_opt_n, bhs_unsigned, mxz)
DEF_SVE_FUNCTION (svmad, ternary_opt_n, all_arith, mxz)
-DEF_SVE_FUNCTION (svmax, binary_opt_n, all_arith, mxz)
-DEF_SVE_FUNCTION (svmaxnm, binary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svmax, binary_opt_single_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svmaxnm, binary_opt_single_n, all_float, mxz)
DEF_SVE_FUNCTION (svmaxnmv, reduction, all_float, implicit)
DEF_SVE_FUNCTION (svmaxv, reduction, all_arith, implicit)
-DEF_SVE_FUNCTION (svmin, binary_opt_n, all_arith, mxz)
-DEF_SVE_FUNCTION (svminnm, binary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svmin, binary_opt_single_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svminnm, binary_opt_single_n, all_float, mxz)
DEF_SVE_FUNCTION (svminnmv, reduction, all_float, implicit)
DEF_SVE_FUNCTION (svminv, reduction, all_arith, implicit)
DEF_SVE_FUNCTION (svmla, ternary_opt_n, all_arith, mxz)
DEF_SVE_FUNCTION (svorr, binary_opt_n, b, z)
DEF_SVE_FUNCTION (svorv, reduction, all_integer, implicit)
DEF_SVE_FUNCTION (svpfalse, inherent_b, b, none)
+DEF_SVE_FUNCTION (svpfalse, inherent, c, none)
DEF_SVE_FUNCTION (svpfirst, unary, b, implicit)
DEF_SVE_FUNCTION (svpnext, unary_pred, all_pred, implicit)
DEF_SVE_FUNCTION (svprfb, prefetch, none, implicit)
DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz)
DEF_SVE_FUNCTION (svrevh, unary, sd_integer, mxz)
DEF_SVE_FUNCTION (svrevw, unary, d_integer, mxz)
-DEF_SVE_FUNCTION (svrinta, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrinta, unaryxn, all_float, mxz)
DEF_SVE_FUNCTION (svrinti, unary, all_float, mxz)
-DEF_SVE_FUNCTION (svrintm, unary, all_float, mxz)
-DEF_SVE_FUNCTION (svrintn, unary, all_float, mxz)
-DEF_SVE_FUNCTION (svrintp, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrintm, unaryxn, all_float, mxz)
+DEF_SVE_FUNCTION (svrintn, unaryxn, all_float, mxz)
+DEF_SVE_FUNCTION (svrintp, unaryxn, all_float, mxz)
DEF_SVE_FUNCTION (svrintx, unary, all_float, mxz)
DEF_SVE_FUNCTION (svrintz, unary, all_float, mxz)
DEF_SVE_FUNCTION (svrsqrte, unary, all_float, none)
DEF_SVE_FUNCTION (svrsqrts, binary, all_float, none)
DEF_SVE_FUNCTION (svscale, binary_int_opt_n, all_float, mxz)
-DEF_SVE_FUNCTION (svsel, binary, all_data, implicit)
-DEF_SVE_FUNCTION (svsel, binary, b, implicit)
+DEF_SVE_FUNCTION (svsel, binaryxn, all_data, implicit)
+DEF_SVE_FUNCTION (svsel, binaryxn, b, implicit)
DEF_SVE_FUNCTION (svset2, set, all_data, none)
+DEF_SVE_FUNCTION (svset2, set, b, none)
DEF_SVE_FUNCTION (svset3, set, all_data, none)
DEF_SVE_FUNCTION (svset4, set, all_data, none)
DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit)
DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz)
-DEF_SVE_FUNCTION (svst1, store, all_data, implicit)
+DEF_SVE_FUNCTION (svst1, storexn, all_data, implicit)
DEF_SVE_FUNCTION (svst1b, store, hsd_integer, implicit)
DEF_SVE_FUNCTION (svst1h, store, sd_integer, implicit)
DEF_SVE_FUNCTION (svst1w, store, d_integer, implicit)
DEF_SVE_FUNCTION (svst2, store, all_data, implicit)
DEF_SVE_FUNCTION (svst3, store, all_data, implicit)
DEF_SVE_FUNCTION (svst4, store, all_data, implicit)
-DEF_SVE_FUNCTION (svstnt1, store, all_data, implicit)
+DEF_SVE_FUNCTION (svstnt1, storexn, all_data, implicit)
DEF_SVE_FUNCTION (svsub, binary_opt_n, all_arith, mxz)
DEF_SVE_FUNCTION (svsubr, binary_opt_n, all_arith, mxz)
DEF_SVE_FUNCTION (svtbl, binary_uint, all_data, none)
DEF_SVE_FUNCTION (svtrn2, binary_pred, all_pred, none)
DEF_SVE_FUNCTION (svundef, inherent, all_data, none)
DEF_SVE_FUNCTION (svundef2, inherent, all_data, none)
+DEF_SVE_FUNCTION (svundef2, inherent, b, none)
DEF_SVE_FUNCTION (svundef3, inherent, all_data, none)
DEF_SVE_FUNCTION (svundef4, inherent, all_data, none)
DEF_SVE_FUNCTION (svunpkhi, unary_widen, hsd_integer, none)
DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none)
DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none)
DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none)
-DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
+DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt_bfloat, mxz)
DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
#undef REQUIRED_EXTENSIONS
template<typename T>
using write_za = add_call_properties<T, CP_WRITE_ZA>;
+template<typename T>
+using read_zt0 = add_call_properties<T, CP_READ_ZT0>;
+
+template<typename T>
+using write_zt0 = add_call_properties<T, CP_WRITE_ZT0>;
+
/* A function_base that sometimes or always operates on tuples of
vectors. */
class multi_vector_function : public function_base
memory_vector_mode (const function_instance &fi) const override
{
machine_mode mode = fi.vector_mode (0);
- if (m_vectors_per_tuple != 1)
- mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
+ auto vectors_per_tuple = fi.vectors_per_tuple ();
+ if (vectors_per_tuple != 1)
+ mode = targetm.array_mode (mode, vectors_per_tuple).require ();
return mode;
}
};
public:
CONSTEXPR rtx_code_function_base (rtx_code code_for_sint,
rtx_code code_for_uint,
- int unspec_for_fp = -1)
+ int unspec_for_cond_fp = -1,
+ int unspec_for_uncond_fp = -1)
: m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint),
- m_unspec_for_fp (unspec_for_fp) {}
+ m_unspec_for_cond_fp (unspec_for_cond_fp),
+ m_unspec_for_uncond_fp (unspec_for_uncond_fp) {}
/* The rtx code to use for signed and unsigned integers respectively.
Can be UNKNOWN for functions that don't have integer forms. */
/* The UNSPEC_COND_* to use for floating-point operations. Can be -1
for functions that only operate on integers. */
- int m_unspec_for_fp;
+ int m_unspec_for_cond_fp;
+
+ /* The UNSPEC_* to use for unpredicated floating-point operations.
+ Can be -1 if there is no such operation. */
+ int m_unspec_for_uncond_fp;
};
/* A function_base for functions that have an associated rtx code.
expand (function_expander &e) const override
{
return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
- m_unspec_for_fp);
+ m_unspec_for_cond_fp, m_unspec_for_uncond_fp);
}
};
unsigned int nargs = e.args.length ();
e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs);
return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
- m_unspec_for_fp, nargs - 1);
+ m_unspec_for_cond_fp, m_unspec_for_uncond_fp,
+ nargs - 1);
}
};
expand (function_expander &e) const override
{
return e.use_exact_insn (CODE (unspec_for (e),
- e.vector_mode (m_suffix_index)));
+ e.tuple_mode (m_suffix_index)));
}
};
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve>
+ unspec_based_uncond_function;
+
/* A function that performs an unspec and then adds it to another value. */
typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add>
unspec_based_add_function;
typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane>
unspec_based_sub_lane_function;
+/* A function that has conditional and unconditional forms, with both
+ forms being associated with a single unspec each. */
+class cond_or_uncond_unspec_function : public function_base
+{
+public:
+ CONSTEXPR cond_or_uncond_unspec_function (int cond_unspec, int uncond_unspec)
+ : m_cond_unspec (cond_unspec), m_uncond_unspec (uncond_unspec) {}
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ if (e.pred == PRED_none)
+ {
+ auto mode = e.tuple_mode (0);
+ auto icode = (e.mode_suffix_id == MODE_single
+ ? code_for_aarch64_sve_single (m_uncond_unspec, mode)
+ : code_for_aarch64_sve (m_uncond_unspec, mode));
+ return e.use_exact_insn (icode);
+ }
+ return e.map_to_unspecs (m_cond_unspec, m_cond_unspec, m_cond_unspec);
+ }
+
+ /* The unspecs for the conditional and unconditional instructions,
+ respectively. */
+ int m_cond_unspec;
+ int m_uncond_unspec;
+};
+
/* General SME unspec-based functions, parameterized on the vector mode. */
class sme_1mode_function : public read_write_za<unspec_based_function_base>
{
rtx
expand (function_expander &e) const override
{
- auto icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1));
+ insn_code icode;
+ if (e.mode_suffix_id == MODE_single)
+ icode = code_for_aarch64_sme_single (unspec_for (e), e.tuple_mode (1));
+ else
+ icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1));
return e.use_exact_insn (icode);
}
};
/* General SME unspec-based functions, parameterized on both the ZA mode
and the vector mode. */
-template<insn_code (*CODE) (int, machine_mode, machine_mode)>
+template<insn_code (*CODE) (int, machine_mode, machine_mode),
+ insn_code (*CODE_SINGLE) (int, machine_mode, machine_mode)>
class sme_2mode_function_t : public read_write_za<unspec_based_function_base>
{
public:
rtx
expand (function_expander &e) const override
{
- insn_code icode = CODE (unspec_for (e), e.vector_mode (0),
- e.tuple_mode (1));
+ insn_code icode;
+ if (e.mode_suffix_id == MODE_single)
+ icode = CODE_SINGLE (unspec_for (e), e.vector_mode (0),
+ e.tuple_mode (1));
+ else
+ icode = CODE (unspec_for (e), e.vector_mode (0), e.tuple_mode (1));
return e.use_exact_insn (icode);
}
};
-using sme_2mode_function = sme_2mode_function_t<code_for_aarch64_sme>;
+using sme_2mode_function
+ = sme_2mode_function_t<code_for_aarch64_sme, code_for_aarch64_sme_single>;
+
+using sme_2mode_lane_function
+ = sme_2mode_function_t<code_for_aarch64_sme_lane, nullptr>;
/* A function that acts like unspec_based_function_exact_insn<INT_CODE>
when operating on integers, but that expands to an (fma ...)-style
int m_unspec;
};
+/* A function that implements a x2 or x4 permute instruction. Both forms
+ of intrinsic have a single x2 or x4 tuple argument, but the underlying
+ x2 instruction takes two separate input operands. */
+class multireg_permute : public function_base
+{
+public:
+ CONSTEXPR multireg_permute (int unspec) : m_unspec (unspec) {}
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ insn_code icode = code_for_aarch64_sve (m_unspec, e.tuple_mode (0));
+ if (e.group_suffix ().vectors_per_tuple == 2)
+ {
+ machine_mode elt_mode = e.vector_mode (0);
+ rtx arg = e.args[0];
+ e.args[0] = simplify_gen_subreg (elt_mode, arg, GET_MODE (arg), 0);
+ e.args.safe_push (simplify_gen_subreg (elt_mode, arg, GET_MODE (arg),
+ GET_MODE_SIZE (elt_mode)));
+ }
+ return e.use_exact_insn (icode);
+ }
+
+ /* The unspec associated with the permutation. */
+ int m_unspec;
+};
+
+/* A function that has two type integer type suffixes, which might agree
+ or disagree on signedness. There are separate instructions for each
+ signed/unsigned combination. */
+class integer_conversion : public function_base
+{
+public:
+ CONSTEXPR integer_conversion (int unspec_for_sint, int unspec_for_sintu,
+ int unspec_for_uint, int unspec_for_uints)
+ : m_unspec_for_sint (unspec_for_sint),
+ m_unspec_for_sintu (unspec_for_sintu),
+ m_unspec_for_uint (unspec_for_uint),
+ m_unspec_for_uints (unspec_for_uints)
+ {}
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ machine_mode mode0 = e.vector_mode (0);
+ machine_mode mode1 = GET_MODE (e.args[0]);
+ int unspec;
+ if (e.type_suffix (0).unsigned_p == e.type_suffix (1).unsigned_p)
+ unspec = (e.type_suffix (0).unsigned_p
+ ? m_unspec_for_uint
+ : m_unspec_for_sint);
+ else
+ unspec = (e.type_suffix (0).unsigned_p
+ ? m_unspec_for_sintu
+ : m_unspec_for_uints);
+ return e.use_exact_insn (code_for_aarch64_sve (unspec, mode0, mode1));
+ }
+
+ /* The unspec for signed -> signed. */
+ int m_unspec_for_sint;
+
+ /* The unspec for signed -> unsigned. */
+ int m_unspec_for_sintu;
+
+ /* The unspec for unsigned -> signed. */
+ int m_unspec_for_uint;
+
+ /* The unspec for unsigned -> unsigned. */
+ int m_unspec_for_uints;
+};
+
/* A function_base for functions that reduce a vector to a scalar. */
class reduction : public function_base
{
if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT))
{
e.args.last () = shift;
- return e.map_to_rtx_codes (m_code, m_code, -1);
+ return e.map_to_rtx_codes (m_code, m_code, -1, -1);
}
if (e.pred == PRED_x)
int unspec = (e.type_suffix (1).unsigned_p
? m_unspec_for_uint
: m_unspec_for_sint);
+ if (e.vectors_per_tuple () > 1)
+ {
+ auto bits = e.type_suffix (0).element_bits;
+ auto icode = code_for_aarch64_sve_while_b_x2 (unspec, bits);
+ return e.use_exact_insn (icode);
+ }
+ if (e.type_suffix (0).tclass == TYPE_count)
+ {
+ auto bits = e.type_suffix (0).element_bits;
+ auto icode = code_for_aarch64_sve_while_c (unspec, bits);
+ return e.use_exact_insn (icode);
+ }
+
machine_mode pred_mode = e.vector_mode (0);
scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1));
return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode));
return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST));
}
+/* GROUP's first type suffix is a ZA-related one. Return true if the
+ group exists only for the purpose of defining C overloads. This is
+ useful if some forms of an instruction require one feature and other
+ forms require another feature, and neither feature implies the other. */
+static bool
+za_group_is_pure_overload (const function_group_info &group)
+{
+ gcc_checking_assert (type_suffixes[group.types[0][0]].za_p);
+ return group.types[0][1] == NUM_TYPE_SUFFIXES;
+}
+
/* If INSTANCE has a governing predicate, add it to the list of argument
types in ARGUMENT_TYPES. RETURN_TYPE is the type returned by the
function. */
in the original format string. */
if (instance.pred != PRED_none && instance.pred != PRED_za_m)
{
- argument_types.quick_insert (0, get_svbool_t ());
+ argument_types.quick_insert (0, instance.gp_type ());
/* For unary merge operations, the first argument is a vector with
the same type as the result. For unary_convert_narrowt it also
provides the "bottom" half of active elements, and is present
f<bits> - a floating-point type with the given number of bits
f[01] - a floating-point type with the same width as type suffix 0 or 1
B - bfloat16_t
+ c - a predicate-as-counter
h<elt> - a half-sized version of <elt>
p - a predicate (represented as TYPE_SUFFIX_b)
q<elt> - a quarter-sized version of <elt>
return suffix;
}
+ if (ch == 'c')
+ return TYPE_SUFFIX_c;
+
if (ch == 'p')
return TYPE_SUFFIX_b;
ap - array pointer for prefetches
as - array pointer for stores
b - base vector type (from a _<m0>base suffix)
+ c0 - the result of a conversion, based on type and group suffixes
+ c1 - the source of a conversion, based on type and group suffixes
d - displacement vector type (from a _<m1>index or _<m1>offset suffix)
e<name> - an enum with the given name
s<elt> - a scalar type with the given element suffix
if (ch == 'b')
return instance.base_vector_type ();
+ if (ch == 'c')
+ {
+ int ch = *format++;
+ gcc_assert (ch == '0' || ch == '1');
+ unsigned int id = (ch == '0' ? 0 : 1);
+ auto vector_type = instance.type_suffix (id).vector_type;
+ unsigned int num_vectors = instance.group_suffix ().vectors_per_tuple;
+ if (num_vectors != 1)
+ {
+ unsigned int bits = instance.type_suffix (id).element_bits;
+ unsigned int other_bits = instance.type_suffix (1 - id).element_bits;
+ if (other_bits > bits)
+ num_vectors /= other_bits / bits;
+ }
+ return acle_vector_types[num_vectors - 1][vector_type];
+ }
+
if (ch == 'd')
return instance.displacement_vector_type ();
}
};
+/* Base class for shapes like binary_za_slice_lane. TCLASS is the type
+ class of the final vector argument. */
+template<type_class_index TCLASS = function_resolver::SAME_TYPE_CLASS>
+struct binary_za_slice_lane_base : public overloaded_base<1>
+{
+ constexpr binary_za_slice_lane_base (unsigned int lane_type_suffix)
+ : m_lane_type_suffix (lane_type_suffix) {}
+
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su32,t1,v1,su64", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ sve_type type;
+ if (!r.check_num_arguments (4)
+ || !r.require_scalar_type (0, "uint32_t")
+ || !(type = r.infer_tuple_type (1))
+ || !r.require_derived_vector_type (2, 1, type, TCLASS)
+ || !r.require_integer_immediate (3))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ unsigned int bytes = c.type_suffix (m_lane_type_suffix).element_bytes;
+ return c.require_immediate_range (3, 0, 16 / bytes - 1);
+ }
+
+ unsigned int m_lane_type_suffix;
+};
+
+/* Base class for shapes like binary_za_slice_opt_single. TCLASS is the
+ type class of the final argument. */
+template<type_class_index TCLASS = function_resolver::SAME_TYPE_CLASS>
+struct binary_za_slice_opt_single_base : public overloaded_base<1>
+{
+ tree
+ resolve (function_resolver &r) const override
+ {
+ sve_type type;
+ if (!r.check_num_arguments (3)
+ || !r.require_scalar_type (0, "uint32_t")
+ || !(type = r.infer_tuple_type (1)))
+ return error_mark_node;
+
+ return r.finish_opt_single_resolution (2, 1, type, TCLASS);
+ }
+};
+
/* Base class for inc_dec and inc_dec_pat. */
struct inc_dec_base : public overloaded_base<0>
{
|| (vnum_p && !r.require_scalar_type (i + 1, "int64_t")))
return error_mark_node;
- return r.resolve_to (r.mode_suffix_id, type);
+ return r.resolve_to (r.mode_suffix_id, type, NUM_TYPE_SUFFIXES,
+ r.group_suffix_id);
}
};
}
};
+/* sv<t0>x<g>_t svfoo_t0_g(uint64_t, svuint8_t, uint64_t)
+
+ where the first argument is the ZT register number (currently always 0)
+ and the final argument is a constant index. The instruction divides
+ the vector argument in BITS-bit quantities. */
+template<unsigned int BITS>
+struct luti_lane_zt_base : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "t0,su64,vu8,su64", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ auto nvectors = c.vectors_per_tuple ();
+ return (c.require_immediate_range (0, 0, 0)
+ && c.require_immediate_range (2, 0, 32 / BITS / nvectors - 1));
+ }
+};
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t,
sv<t0:quarter>_t) (for integer t0)
sv<t0>_t svmmla[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t) (for floating-point t0)
};
SHAPE (binary_int_opt_n)
+/* Like binary_int_opt_n for single vectors. For tuples:
+
+ sv<t0>x<g>_t svfoo[_t0_g](sv<t0>x<g>_t, sv<t0:int>x<g>_t)
+ sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0:int>_t). */
+struct binary_int_opt_single_n_def : public overloaded_base<0>
+{
+ bool explicit_group_suffix_p () const override { return false; }
+
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "t0,t0,ts0", group, MODE_none);
+ if (group.groups[0] == GROUP_none)
+ build_all (b, "v0,v0,ss0", group, MODE_n);
+ else
+ build_all (b, "t0,t0,vs0", group, MODE_single);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ unsigned int i, nargs;
+ sve_type type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || !(type = r.infer_sve_type (i)))
+ return error_mark_node;
+
+ return (type.num_vectors == 1 && r.scalar_argument_p (i + 1)
+ ? r.finish_opt_n_resolution (i + 1, i, type.type, TYPE_signed)
+ : r.finish_opt_single_resolution (i + 1, i, type, TYPE_signed));
+ }
+};
+SHAPE (binary_int_opt_single_n)
+
/* sv<t0>_t svfoo_<t0>(sv<t0>_t, sv<t0>_t, uint64_t)
where the final argument is an integer constant expression in the
};
SHAPE (binary_opt_n)
+/* Like binary_opt_n for single vectors. For tuples:
+
+ sv<t0>x<g>_t svfoo[_t0_g](sv<t0>x<g>_t, sv<t0>x<g>_t)
+ sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0>_t). */
+struct binary_opt_single_n_def : public overloaded_base<0>
+{
+ bool explicit_group_suffix_p () const override { return false; }
+
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "t0,t0,t0", group, MODE_none);
+ if (group.groups[0] == GROUP_none)
+ build_all (b, "v0,v0,s0", group, MODE_n);
+ else
+ build_all (b, "t0,t0,v0", group, MODE_single);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ unsigned int i, nargs;
+ sve_type type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || !(type = r.infer_sve_type (i)))
+ return error_mark_node;
+
+ return (type.num_vectors == 1 && r.scalar_argument_p (i + 1)
+ ? r.finish_opt_n_resolution (i + 1, i, type.type)
+ : r.finish_opt_single_resolution (i + 1, i, type));
+ }
+};
+SHAPE (binary_opt_single_n)
+
/* svbool_t svfoo(svbool_t, svbool_t). */
struct binary_pred_def : public nonoverloaded_base
{
};
SHAPE (binary_scalar)
+/* sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0>_t). */
+struct binary_single_def : public overloaded_base<0>
+{
+ bool explicit_group_suffix_p () const override { return false; }
+
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "t0,t0,v0", group, MODE_single);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ sve_type type;
+ if (!r.check_num_arguments (2)
+ || !(type = r.infer_sve_type (0))
+ || !r.require_derived_vector_type (1, 0, type, r.SAME_TYPE_CLASS,
+ r.SAME_SIZE, 1))
+ return error_mark_node;
+
+ return r.resolve_to (MODE_single, type);
+ }
+};
+SHAPE (binary_single)
+
/* sv<t0:uint>_t svfoo[_t0](sv<t0>_t, sv<t0>_t).
i.e. a version of "binary" that returns unsigned integers. */
};
SHAPE (binary_za_m)
+/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>_t, uint64_t)
+
+ where the first argument is a variable ZA slice and the final argument
+ indexes a single element in the preceding vector argument. */
+struct binary_za_slice_lane_def : public binary_za_slice_lane_base<>
+{
+ constexpr binary_za_slice_lane_def () : binary_za_slice_lane_base<> (1) {}
+};
+SHAPE (binary_za_slice_lane)
+
+/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:int>x<g>_t)
+ void svfoo[_single]_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:int>_t).
+
+ where the first argument is a variable ZA slice. */
+struct binary_za_slice_int_opt_single_def
+ : public binary_za_slice_opt_single_base<TYPE_signed>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su32,t1,ts1", group, MODE_none);
+ build_all (b, "_,su32,t1,vs1", group, MODE_single);
+ }
+};
+SHAPE (binary_za_slice_int_opt_single)
+
+/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>x<g>_t)
+ void svfoo[_single]_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>_t)
+
+ where the first argument is a variable ZA slice. */
+struct binary_za_slice_opt_single_def
+ : public binary_za_slice_opt_single_base<>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su32,t1,t1", group, MODE_none);
+ build_all (b, "_,su32,t1,v1", group, MODE_single);
+ }
+};
+SHAPE (binary_za_slice_opt_single)
+
+/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>x<g>_t)
+ void svfoo[_single]_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>_t)
+
+ where the first argument is a variable ZA slice. */
+struct binary_za_slice_uint_opt_single_def
+ : public binary_za_slice_opt_single_base<TYPE_unsigned>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su32,t1,tu1", group, MODE_none);
+ build_all (b, "_,su32,t1,vu1", group, MODE_single);
+ }
+};
+SHAPE (binary_za_slice_uint_opt_single)
+
/* void svfoo_t0[_t1]_g(uint64_t, svbool_t, svbool_t, sv<t1>x<g>_t,
sv<t1:uint>x<g>_t)
};
SHAPE (binary_za_uint_m)
+/* sv<t0>x<g>_t svfoo[_t0_t1_g](sv<t0>x<g>_t, sv<t0>x<g>_t). */
+struct binaryxn_def : public overloaded_base<0>
+{
+ bool explicit_group_suffix_p () const override { return false; }
+
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "t0,t0,t0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ vector_type_index pred_type;
+ sve_type type;
+ if (!r.check_num_arguments (3)
+ || (pred_type = r.infer_predicate_type (0)) == NUM_VECTOR_TYPES
+ || !(type = r.infer_sve_type (1))
+ || !r.require_matching_predicate_type (pred_type, type)
+ || !r.require_matching_vector_type (2, 1, type))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+SHAPE (binaryxn)
+
/* bool svfoo(). */
struct bool_inherent_def : public nonoverloaded_base
{
};
SHAPE (bool_inherent)
+/* Either:
+
+ sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t)
+
+ for single vectors or:
+
+ sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0>_t, sv<t0>_t)
+
+ for tuples. */
+struct clamp_def : public overloaded_base<0>
+{
+ bool explicit_group_suffix_p () const override { return false; }
+
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "t0,t0,v0,v0", group,
+ group.groups[0] == GROUP_none ? MODE_none : MODE_single);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ sve_type type;
+ if (!r.check_num_arguments (3)
+ || !(type = r.infer_sve_type (0))
+ || !r.require_derived_vector_type (1, 0, type, r.SAME_TYPE_CLASS,
+ r.SAME_SIZE, 1)
+ || !r.require_derived_vector_type (2, 0, type, r.SAME_TYPE_CLASS,
+ r.SAME_SIZE, 1))
+ return error_mark_node;
+
+ auto mode = type.num_vectors == 1 ? MODE_none : MODE_single;
+ return r.resolve_to (mode, type);
+ }
+};
+SHAPE (clamp)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
<t0>_t svfoo[_n_t0](<t0>_t, sv<t0>_t). */
struct clast_def : public overloaded_base<0>
};
SHAPE (compare_ptr)
-/* svbool_t svfoo_t0[_t1](<t1>_t, <t1>_t)
+/* svboolx<g>_t svfoo_t0[_t1]_g(<t1>_t, <t1>_t)
where _t0 is a _b<bits> suffix that describes the predicate result.
There is no direct relationship between the element sizes of _t0
build (function_builder &b, const function_group_info &group) const override
{
b.add_overloaded_functions (group, MODE_none);
- build_all (b, "vp,s1,s1", group, MODE_none);
+ build_all (b, "tp,s1,s1", group, MODE_none);
}
tree
|| !r.require_matching_integer_scalar_type (i + 1, i, type))
return error_mark_node;
- return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type);
+ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type,
+ r.group_suffix_id);
}
};
SHAPE (compare_scalar)
+/* svcount_t svfoo_t0[_t1](<t1>_t, <t1>_t, uint64_t)
+
+ where _t0 is a _c<bits> suffix that describes the predicate-as-counter
+ result. The final argument is an integer constant that specifies the
+ number of vectors (2 or 4). */
+struct compare_scalar_count_def : public overloaded_base<1>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,s1,s1,su64", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (3, i, nargs)
+ || (type = r.infer_64bit_scalar_integer_pair (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_integer_immediate (i + 2))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_either_or (2, 2, 4);
+ }
+};
+SHAPE (compare_scalar_count)
+
/* svbool_t svfoo[_t0](sv<t0>_t, svint64_t) (for signed t0)
svbool_t svfoo[_n_t0](sv<t0>_t, int64_t) (for signed t0)
svbool_t svfoo[_t0](sv<t0>_t, svuint64_t) (for unsigned t0)
};
SHAPE (count_pred)
+/* uint64_t svfoo_t0(sv<t0>_t, uint64_t)
+
+ where the final argument must be 2 or 4. */
+struct count_pred_c_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "su64,v0,su64", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_either_or (1, 2, 4);
+ }
+};
+SHAPE (count_pred_c)
+
/* uint64_t svfoo[_t0](sv<t0>_t). */
struct count_vector_def : public overloaded_base<0>
{
};
SHAPE (create)
-/* sv<t0>_t svfoo[_n]_t0(<t0>_t, ..., <t0>_t)
+/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:int>_t, uint64_t)
- where there are enough arguments to fill 128 bits of data (or to
- control 128 bits of data in the case of predicates). */
-struct dupq_def : public overloaded_base<1>
+ where the final argument indexes a <t0>-sized group of elements in the
+ preceding vector argument. */
+struct dot_za_slice_int_lane_def
+ : public binary_za_slice_lane_base<TYPE_signed>
{
+ constexpr dot_za_slice_int_lane_def ()
+ : binary_za_slice_lane_base<TYPE_signed> (0) {}
+
void
build (function_builder &b, const function_group_info &group) const override
{
- /* The "_n" suffix is optional; the full name has it, but the short
- name doesn't. */
- build_all (b, "v0,s0*q", group, MODE_n, true);
- }
-
- tree
- resolve (function_resolver &) const override
- {
- /* The short forms just make "_n" implicit, so no resolution is needed. */
- gcc_unreachable ();
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su32,t1,vs1,su64", group, MODE_none);
}
};
-SHAPE (dupq)
+SHAPE (dot_za_slice_int_lane)
-/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, uint64_t)
+/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>_t, uint64_t)
- where the final argument is an integer constant expression that when
+ where the final argument indexes a <t0>-sized group of elements in the
+ preceding vector argument. */
+struct dot_za_slice_lane_def : public binary_za_slice_lane_base<>
+{
+ constexpr dot_za_slice_lane_def () : binary_za_slice_lane_base<> (0) {}
+};
+SHAPE (dot_za_slice_lane)
+
+/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>_t, uint64_t)
+
+ where the final argument indexes a <t0>-sized group of elements in the
+ preceding vector argument. */
+struct dot_za_slice_uint_lane_def
+ : public binary_za_slice_lane_base<TYPE_unsigned>
+{
+ constexpr dot_za_slice_uint_lane_def ()
+ : binary_za_slice_lane_base<TYPE_unsigned> (0) {}
+
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su32,t1,vu1,su64", group, MODE_none);
+ }
+};
+SHAPE (dot_za_slice_uint_lane)
+
+/* sv<t0>_t svfoo[_n]_t0(<t0>_t, ..., <t0>_t)
+
+ where there are enough arguments to fill 128 bits of data (or to
+ control 128 bits of data in the case of predicates). */
+struct dupq_def : public overloaded_base<1>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ /* The "_n" suffix is optional; the full name has it, but the short
+ name doesn't. */
+ build_all (b, "v0,s0*q", group, MODE_n, true);
+ }
+
+ tree
+ resolve (function_resolver &) const override
+ {
+ /* The short forms just make "_n" implicit, so no resolution is needed. */
+ gcc_unreachable ();
+ }
+};
+SHAPE (dupq)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, uint64_t)
+
+ where the final argument is an integer constant expression that when
multiplied by the number of bytes in t0 is in the range [0, 255]. */
struct ext_def : public overloaded_base<0>
{
};
SHAPE (ext)
+/* svboolx<g>_t svfoo_t0_g(sv<t0>_t, sv<t0>_t, uint32_t). */
+struct extract_pred_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "tp,vc,su64", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ unsigned int size = c.vectors_per_tuple ();
+ return c.require_immediate_range (1, 0, 4 / size - 1);
+ }
+};
+SHAPE (extract_pred)
+
/* <t0>_t svfoo[_t0](<t0>_t, sv<t0>_t). */
struct fold_left_def : public overloaded_base<0>
{
};
SHAPE (inherent_za)
+/* void svfoo_zt(uint64_t)
+
+ where the argument must be zero. */
+struct inherent_zt_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_,su64", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, 0);
+ }
+};
+SHAPE (inherent_zt)
+
/* void svfoo_t0(uint64_t)
where the argument is an integer constant that specifies an 8-bit mask. */
};
SHAPE (ldr_za)
-/* sv<t0>[xN]_t svfoo[_t0](const <t0>_t *)
- sv<t0>[xN]_t svfoo_vnum[_t0](const <t0>_t *, int64_t). */
+/* void svfoo_zt(uint64_t, const void *)
+
+ where the first argument must be zero. */
+struct ldr_zt_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_,su64,al", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, 0);
+ }
+};
+SHAPE (ldr_zt)
+
+/* sv<t0>[xN]_t svfoo[_t0]_g(const <t0>_t *)
+ sv<t0>[xN]_t svfoo_vnum[_t0]_g(const <t0>_t *, int64_t). */
struct load_def : public load_contiguous_base
{
void
};
SHAPE (load_za)
+using luti2_lane_zt_def = luti_lane_zt_base<2>;
+SHAPE (luti2_lane_zt)
+
+using luti4_lane_zt_def = luti_lane_zt_base<4>;
+SHAPE (luti4_lane_zt)
+
/* svbool_t svfoo(enum svpattern). */
struct pattern_pred_def : public nonoverloaded_base
{
};
SHAPE (rdffr)
+/* sv<t1>x<g>_t svfoo_t0_t1_g(uint64_t, uint32_t). */
+struct read_za_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "t1,su64,su32", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1);
+ }
+};
+SHAPE (read_za)
+
/* sv<t1>_t svfoo_t0[_t1](uint64_t, uint32_t)
where the first two fields form a (ZA tile, slice) pair. */
};
SHAPE (read_za_m)
+/* sv<t1>x<g>_t svfoo_t0_t1_g(uint32_t). */
+struct read_za_slice_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "t1,su32", group, MODE_none);
+ }
+};
+SHAPE (read_za_slice)
+
/* <t0>_t svfoo[_t0](sv<t0>_t). */
struct reduction_def : public overloaded_base<0>
{
};
SHAPE (reinterpret)
+/* sv<t0>_t svfoo_t0(sv<t0>_t, sv<t0>_t, uint32_t). */
+struct select_pred_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "v0,v0,vp,su32", group, MODE_none);
+ }
+};
+SHAPE (select_pred)
+
/* sv<t0>xN_t svfoo[_t0](sv<t0>xN_t, uint64_t, sv<t0>_t)
where the second argument is an integer constant expression in the
shift_right_imm_narrowt_to_uint_def;
SHAPE (shift_right_imm_narrowt_to_uint)
+/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
+ range [1, sizeof (<t0>_t) * 8]. */
+struct shift_right_imm_narrowxn_def : public overloaded_base<1>
+{
+ bool explicit_group_suffix_p () const override { return false; }
+
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_n);
+ build_all (b, "c0,c1,su64", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ sve_type type;
+ if (!r.check_num_arguments (2)
+ || !(type = r.infer_sve_type (0))
+ || !r.require_integer_immediate (1))
+ return error_mark_node;
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ unsigned int suffix = c.group_suffix_id == GROUP_x4 ? 1 : 0;
+ unsigned int bits = c.type_suffix (suffix).element_bits;
+ return c.require_immediate_range (1, 1, bits);
+ }
+};
+SHAPE (shift_right_imm_narrowxn)
+
/* void svfoo[_t0](<X>_t *, sv<t0>[xN]_t)
void svfoo_vnum[_t0](<X>_t *, int64_t, sv<t0>[xN]_t)
};
SHAPE (store_za)
+/* void svfoo[_t0_g](<X>_t *, sv<t0>x<g>_t)
+ void svfoo_vnum[_t0_g](<X>_t *, int64_t, sv<t0>x<g>_t)
+
+ where <X> might be tied to <t0> (for non-truncating stores) or might
+ depend on the function base name (for truncating stores). */
+struct storexn_def : public store_def
+{
+ bool explicit_group_suffix_p () const override { return false; }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ bool vnum_p = r.mode_suffix_id == MODE_vnum;
+ gcc_assert (r.mode_suffix_id == MODE_none || vnum_p);
+
+ unsigned int nargs = vnum_p ? 4 : 3;
+ vector_type_index pred_type;
+ sve_type type;
+ if (!r.check_num_arguments (nargs)
+ || (pred_type = r.infer_predicate_type (0)) == NUM_VECTOR_TYPES
+ || !r.require_pointer_type (1)
+ || (vnum_p && !r.require_scalar_type (2, "int64_t"))
+ || !(type = r.infer_sve_type (nargs - 1))
+ || !r.require_matching_predicate_type (pred_type, type))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+SHAPE (storexn)
+
/* void svfoo_t0(uint32_t, void *)
void svfoo_vnum_t0(uint32_t, void *, int64_t)
};
SHAPE (str_za)
+/* void svfoo_zt(uint64_t, void *)
+
+ where the first argument must be zero. */
+struct str_zt_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_,su64,as", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, 0);
+ }
+};
+SHAPE (str_zt)
+
/* sv<t0>_t svfoo[_t0](sv<t0>xN_t, sv<t0:uint>_t). */
struct tbl_tuple_def : public overloaded_base<0>
{
};
SHAPE (ternary_opt_n)
-/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t, uint64_t)
+/* A choice between:
+
+ (1) sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
+ uint64_t)
+
+ (2) sv<t0>_t svfoo[_t0_t1](sv<t0>_t, sv<t1>_t, sv<t1>_t, uint64_t)
where the final argument is an integer constant expression in the range
[0, 16 / sizeof (<t0>_t) - 1]. */
-struct ternary_qq_lane_def : public ternary_qq_lane_base<>
+struct ternary_qq_or_011_lane_def : public ternary_qq_lane_base<>
{
void
build (function_builder &b, const function_group_info &group) const override
{
b.add_overloaded_functions (group, MODE_none);
- build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none);
+ if (group.types[0][1] == NUM_TYPE_SUFFIXES)
+ build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none);
+ else
+ build_all (b, "v0,v0,v1,v1,su64", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ unsigned int i, nargs;
+ type_suffix_index type0, type1;
+ if (!r.check_gp_argument (4, i, nargs)
+ || (type0 = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || (type1 = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+ || !r.require_matching_vector_type (i + 2, i + 1, type1)
+ || !r.require_integer_immediate (i + 3))
+ return error_mark_node;
+
+ if ((type_suffixes[type0].element_bits
+ == 4 * type_suffixes[type1].element_bits)
+ && type_suffixes[type0].tclass == type_suffixes[type1].tclass)
+ if (tree res = r.lookup_form (MODE_none, type0))
+ return res;
+
+ return r.resolve_to (r.mode_suffix_id, type0, type1);
}
};
-SHAPE (ternary_qq_lane)
+SHAPE (ternary_qq_or_011_lane)
/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
uint64_t)
};
SHAPE (ternary_qq_lane_rotate)
-/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t)
- sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:quarter>_t, <t0:quarter>_t)
+/* A choice between:
- i.e. a version of the standard ternary shape ternary_opt_n in which
- the element type of the last two arguments is the quarter-sized
- equivalent of <t0>. */
-struct ternary_qq_opt_n_def
+ (1) sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t)
+ sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:quarter>_t, <t0:quarter>_t)
+
+ i.e. a version of the standard ternary shape ternary_opt_n in which
+ the element type of the last two arguments is the quarter-sized
+ equivalent of <t0>.
+
+ (2) sv<t0>_t svfoo[_t0_t1](sv<t0>_t, sv<t1>_t, sv<t1>_t)
+
+ where the element type of the last two arguments is specified
+ explicitly. */
+struct ternary_qq_opt_n_or_011_def
: public ternary_resize2_opt_n_base<function_resolver::QUARTER_SIZE>
{
void
build (function_builder &b, const function_group_info &group) const override
{
b.add_overloaded_functions (group, MODE_none);
- build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
- build_all (b, "v0,v0,vq0,sq0", group, MODE_n);
+ if (group.types[0][1] == NUM_TYPE_SUFFIXES)
+ {
+ build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
+ build_all (b, "v0,v0,vq0,sq0", group, MODE_n);
+ }
+ else
+ build_all (b, "v0,v0,v1,v1", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ unsigned int i, nargs;
+ type_suffix_index type0, type1;
+ if (!r.check_gp_argument (3, i, nargs)
+ || (type0 = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || (type1 = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+ || !r.require_vector_or_scalar_type (i + 2))
+ return error_mark_node;
+
+ auto mode = r.scalar_argument_p (i + 2) ? MODE_n : MODE_none;
+ if (mode == MODE_none
+ && !r.require_matching_vector_type (i + 2, i + 1, type1))
+ return error_mark_node;
+
+ if ((type_suffixes[type0].element_bits
+ == 4 * type_suffixes[type1].element_bits)
+ && type_suffixes[type0].tclass == type_suffixes[type1].tclass)
+ if (tree res = r.lookup_form (mode, type0))
+ return res;
+
+ if (!r.require_nonscalar_type (i + 2))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type0, type1);
}
};
-SHAPE (ternary_qq_opt_n)
+SHAPE (ternary_qq_opt_n_or_011)
/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
uint64_t)
build (function_builder &b, const function_group_info &group) const override
{
b.add_overloaded_functions (group, MODE_none);
- build_all (b, "v0,v0", group, MODE_none);
+ build_all (b, "t0,t0", group, MODE_none);
}
tree
build (function_builder &b, const function_group_info &group) const override
{
b.add_overloaded_functions (group, MODE_none);
- build_all (b, "v0,v1", group, MODE_none);
+ build_all (b, "c0,c1", group, MODE_none);
}
tree
};
SHAPE (unary_convert_narrowt)
+/* sv<t0>x<g0>_t svfoo_t0[_t1_g](sv<t1>x<g1>_t)
+
+ where the target type <t0> must be specified explicitly but the
+ source type <t1> can be inferred.
+
+ Functions with a group suffix are unpredicated. For them:
+
+ - If <t0> is N times wider than <t1>, the return value has N times
+ more vectors than the argument.
+
+ - If <t1> is N times wider than <t0>, the argument has N times
+ more vectors than the return type. */
+struct unary_convertxn_def : public unary_convert_def
+{
+ bool explicit_group_suffix_p () const override { return false; }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ if (r.pred != PRED_none)
+ return unary_convert_def::resolve (r);
+
+ sve_type type;
+ if (!r.check_num_arguments (1)
+ || !(type = r.infer_sve_type (0)))
+ return error_mark_node;
+
+ return r.resolve_conversion (r.mode_suffix_id, type);
+ }
+};
+SHAPE (unary_convertxn)
+
/* sv<t0>_t svfoo[_t0](sv<t0:half>_t). */
struct unary_long_def : public overloaded_base<0>
{
};
SHAPE (unary_za_m)
+/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t). */
+struct unary_za_slice_def : public overloaded_base<1>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ if (!za_group_is_pure_overload (group))
+ build_all (b, "_,su32,t1", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ sve_type type;
+ if (!r.check_num_arguments (2)
+ || !r.require_scalar_type (0, "uint32_t")
+ || !(type = r.infer_tuple_type (1)))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+SHAPE (unary_za_slice)
+
+/* sv<t0>x<g>_t svfoo[_t0_g](sv<t0>x<g>_t). */
+struct unaryxn_def : public unary_def
+{
+ bool explicit_group_suffix_p () const override { return false; }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ if (r.pred != PRED_none)
+ return unary_def::resolve (r);
+
+ sve_type type;
+ if (!r.check_num_arguments (1)
+ || !(type = r.infer_sve_type (0)))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+SHAPE (unaryxn)
+
+/* void svfoo_t0[_t1_g](uint64_t, uint32_t, sv<t1>x<g>_t). */
+struct write_za_def : public overloaded_base<1>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su64,su32,t1", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ sve_type type;
+ if (!r.check_num_arguments (3)
+ || !r.require_integer_immediate (0)
+ || !r.require_scalar_type (1, "uint32_t")
+ || !(type = r.infer_tuple_type (2)))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1);
+ }
+};
+SHAPE (write_za)
+
/* void svfoo_t0[_t1](uint64_t, uint32_t, svbool_t, sv<t1>_t)
where the first two fields form a (ZA tile, slice) pair. */
};
SHAPE (write_za_m)
+/* void svfoo_t0[_t1_g](uint32_t, sv<t1>x<g>_t). */
+struct write_za_slice_def : public overloaded_base<1>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su32,t1", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ sve_type type;
+ if (!r.check_num_arguments (2)
+ || !r.require_scalar_type (0, "uint32_t")
+ || !(type = r.infer_tuple_type (1)))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+SHAPE (write_za_slice)
+
}
one in which the argument is the usual vector, and one in which it
is replaced by a scalar.
+ - an "_opt_single" suffix similarly says that the function can take
+ a vector or tuple argument, with the former having a "_single" suffix.
+
- "_int" and "_uint" replace the argument's element type with a
signed or unsigned integer of the same width. The suffixes above
then indicate whether this final argument is or might be a scalar.
extern const function_shape *const adr_offset;
extern const function_shape *const binary;
extern const function_shape *const binary_int_opt_n;
+ extern const function_shape *const binary_int_opt_single_n;
extern const function_shape *const binary_lane;
extern const function_shape *const binary_long_lane;
extern const function_shape *const binary_long_opt_n;
extern const function_shape *const binary_narrowb_opt_n;
extern const function_shape *const binary_narrowt_opt_n;
extern const function_shape *const binary_opt_n;
+ extern const function_shape *const binary_opt_single_n;
extern const function_shape *const binary_pred;
extern const function_shape *const binary_rotate;
extern const function_shape *const binary_scalar;
+ extern const function_shape *const binary_single;
extern const function_shape *const binary_to_uint;
extern const function_shape *const binary_uint;
extern const function_shape *const binary_uint_n;
extern const function_shape *const binary_wide_opt_n;
extern const function_shape *const binary_za_int_m;
extern const function_shape *const binary_za_m;
+ extern const function_shape *const binary_za_slice_lane;
+ extern const function_shape *const binary_za_slice_int_opt_single;
+ extern const function_shape *const binary_za_slice_opt_single;
+ extern const function_shape *const binary_za_slice_uint_opt_single;
extern const function_shape *const binary_za_uint_m;
+ extern const function_shape *const binaryxn;
extern const function_shape *const bool_inherent;
+ extern const function_shape *const clamp;
extern const function_shape *const clast;
extern const function_shape *const compare;
extern const function_shape *const compare_opt_n;
extern const function_shape *const compare_ptr;
extern const function_shape *const compare_scalar;
+ extern const function_shape *const compare_scalar_count;
extern const function_shape *const compare_wide_opt_n;
extern const function_shape *const count_inherent;
extern const function_shape *const count_pat;
extern const function_shape *const count_pred;
+ extern const function_shape *const count_pred_c;
extern const function_shape *const count_vector;
extern const function_shape *const create;
+ extern const function_shape *const dot_za_slice_int_lane;
+ extern const function_shape *const dot_za_slice_lane;
+ extern const function_shape *const dot_za_slice_uint_lane;
extern const function_shape *const dupq;
extern const function_shape *const ext;
+ extern const function_shape *const extract_pred;
extern const function_shape *const fold_left;
extern const function_shape *const get;
extern const function_shape *const inc_dec;
extern const function_shape *const inherent;
extern const function_shape *const inherent_b;
extern const function_shape *const inherent_za;
+ extern const function_shape *const inherent_zt;
extern const function_shape *const inherent_mask_za;
+ extern const function_shape *const ldr_zt;
extern const function_shape *const ldr_za;
extern const function_shape *const load;
extern const function_shape *const load_ext;
extern const function_shape *const load_gather_vs;
extern const function_shape *const load_replicate;
extern const function_shape *const load_za;
+ extern const function_shape *const luti2_lane_zt;
+ extern const function_shape *const luti4_lane_zt;
extern const function_shape *const mmla;
extern const function_shape *const pattern_pred;
extern const function_shape *const prefetch;
extern const function_shape *const prefetch_gather_offset;
extern const function_shape *const ptest;
extern const function_shape *const rdffr;
+ extern const function_shape *const read_za;
extern const function_shape *const read_za_m;
+ extern const function_shape *const read_za_slice;
extern const function_shape *const reduction;
extern const function_shape *const reduction_wide;
extern const function_shape *const reinterpret;
+ extern const function_shape *const select_pred;
extern const function_shape *const set;
extern const function_shape *const setffr;
extern const function_shape *const shift_left_imm_long;
extern const function_shape *const shift_right_imm;
extern const function_shape *const shift_right_imm_narrowb;
extern const function_shape *const shift_right_imm_narrowt;
+ extern const function_shape *const shift_right_imm_narrowxn;
extern const function_shape *const shift_right_imm_narrowb_to_uint;
extern const function_shape *const shift_right_imm_narrowt_to_uint;
extern const function_shape *const store;
extern const function_shape *const store_scatter_offset;
extern const function_shape *const store_scatter_offset_restricted;
extern const function_shape *const store_za;
+ extern const function_shape *const storexn;
extern const function_shape *const str_za;
+ extern const function_shape *const str_zt;
extern const function_shape *const tbl_tuple;
extern const function_shape *const ternary_bfloat;
extern const function_shape *const ternary_bfloat_lane;
extern const function_shape *const ternary_long_lane;
extern const function_shape *const ternary_long_opt_n;
extern const function_shape *const ternary_opt_n;
- extern const function_shape *const ternary_qq_lane;
+ extern const function_shape *const ternary_qq_or_011_lane;
extern const function_shape *const ternary_qq_lane_rotate;
- extern const function_shape *const ternary_qq_opt_n;
+ extern const function_shape *const ternary_qq_opt_n_or_011;
extern const function_shape *const ternary_qq_rotate;
extern const function_shape *const ternary_rotate;
extern const function_shape *const ternary_shift_left_imm;
extern const function_shape *const unary;
extern const function_shape *const unary_convert;
extern const function_shape *const unary_convert_narrowt;
+ extern const function_shape *const unary_convertxn;
extern const function_shape *const unary_long;
extern const function_shape *const unary_n;
extern const function_shape *const unary_narrowb;
extern const function_shape *const unary_uint;
extern const function_shape *const unary_widen;
extern const function_shape *const unary_za_m;
+ extern const function_shape *const unary_za_slice;
+ extern const function_shape *const unaryxn;
+ extern const function_shape *const write_za;
extern const function_shape *const write_za_m;
+ extern const function_shape *const write_za_slice;
}
}
namespace {
-class load_store_za_base : public function_base
+class load_store_za_zt0_base : public function_base
{
public:
tree
expand (function_expander &e) const override
{
auto za_mode = e.vector_mode (0);
- auto z_mode = e.vector_mode (1);
+ auto z_mode = e.tuple_mode (1);
auto icode = (za_mode == VNx1TImode
? code_for_aarch64_sme (m_unspec, za_mode, z_mode)
: code_for_aarch64_sme (m_unspec, z_mode, z_mode));
int m_unspec;
};
-using load_za_base = add_call_properties<load_store_za_base,
+using load_za_base = add_call_properties<load_store_za_zt0_base,
CP_READ_MEMORY | CP_READ_ZA
| CP_WRITE_ZA>;
-using store_za_base = add_call_properties<load_store_za_base,
+using store_za_base = add_call_properties<load_store_za_zt0_base,
CP_WRITE_MEMORY | CP_READ_ZA>;
/* E is a load or store intrinsic that accesses a ZA slice of mode MEM_MODE.
}
}
+/* Use instruction ICODE to expand ZT0 load or store E. */
+
+static rtx
+expand_ldr_str_zt0 (function_expander &e, insn_code icode)
+{
+ rtx base = e.convert_to_pmode (e.args[1]);
+ rtx mem = gen_rtx_MEM (V8DImode, force_reg (Pmode, base));
+ e.add_fixed_operand (mem);
+ return e.generate_insn (icode);
+}
+
/* Expand ZA LD1 or ST1 intrinsic E. UNSPEC is the load or store unspec.
IS_LOAD is true if E is a load, false if it is a store. */
}
};
+class svldr_zt_impl : public load_store_za_zt0_base
+{
+public:
+ unsigned int
+ call_properties (const function_instance &) const override
+ {
+ return CP_READ_MEMORY | CP_WRITE_ZT0;
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_ldr_zt0);
+ }
+};
+
+class svluti_lane_zt_impl : public read_zt0<function_base>
+{
+public:
+ CONSTEXPR svluti_lane_zt_impl (unsigned int bits) : m_bits (bits) {}
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ auto mode = e.tuple_mode (0);
+ e.args.ordered_remove (0);
+ return e.use_exact_insn (code_for_aarch64_sme_lut (m_bits, mode));
+ }
+
+ unsigned int m_bits;
+};
+
+class svread_za_impl : public function_base
+{
+public:
+ unsigned int
+ call_properties (const function_instance &) const override
+ {
+ return CP_READ_ZA;
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
+ return e.use_exact_insn (code_for_aarch64_sme_read (mode));
+ }
+};
+
using svread_za_tile_impl = add_call_properties<read_write_za_base,
CP_READ_ZA>;
}
};
+class svstr_zt_impl : public load_store_za_zt0_base
+{
+public:
+ unsigned int
+ call_properties (const function_instance &) const override
+ {
+ return CP_WRITE_MEMORY | CP_READ_ZT0;
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_str_zt0);
+ }
+};
+
+class svsudot_za_impl : public read_write_za<function_base>
+{
+public:
+ rtx
+ expand (function_expander &e) const override
+ {
+ if (e.mode_suffix_id == MODE_single)
+ {
+ auto icode = code_for_aarch64_sme_single_sudot (e.vector_mode (0),
+ e.tuple_mode (1));
+ return e.use_exact_insn (icode);
+ }
+ std::swap (e.args[1], e.args[2]);
+ return e.use_exact_insn (code_for_aarch64_sme (UNSPEC_SME_USDOT,
+ e.vector_mode (0),
+ e.tuple_mode (1)));
+ }
+};
+
class svundef_za_impl : public write_za<function_base>
{
public:
}
};
+class svwrite_za_impl : public function_base
+{
+public:
+ unsigned int
+ call_properties (const function_instance &) const override
+ {
+ return CP_WRITE_ZA;
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
+ e.args[1] = lowpart_subreg (mode, e.args[1], e.tuple_mode (1));
+ return e.use_exact_insn (code_for_aarch64_sme_write (mode));
+ }
+};
+
using svwrite_za_tile_impl = add_call_properties<read_write_za_base,
CP_READ_ZA | CP_WRITE_ZA>;
}
};
+class svzero_zt_impl : public write_zt0<function_base>
+{
+public:
+ rtx
+ expand (function_expander &) const override
+ {
+ emit_insn (gen_aarch64_sme_zero_zt0 ());
+ return const0_rtx;
+ }
+};
+
} /* end anonymous namespace */
namespace aarch64_sve {
FUNCTION (arm_has_sme, arm_has_sme_impl, )
FUNCTION (arm_in_streaming_mode, arm_in_streaming_mode_impl, )
+FUNCTION (svadd_za, sme_1mode_function, (UNSPEC_SME_ADD, UNSPEC_SME_ADD,
+ UNSPEC_SME_FADD))
+FUNCTION (svadd_write_za, sme_1mode_function, (UNSPEC_SME_ADD_WRITE,
+ UNSPEC_SME_ADD_WRITE, -1))
FUNCTION (svaddha_za, sme_1mode_function, (UNSPEC_SME_ADDHA,
UNSPEC_SME_ADDHA, -1))
FUNCTION (svaddva_za, sme_1mode_function, (UNSPEC_SME_ADDVA,
UNSPEC_SME_ADDVA, -1))
+FUNCTION (svbmopa_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPA, -1))
+FUNCTION (svbmops_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPS, -1))
FUNCTION (svcntsb, svcnts_bhwd_impl, (VNx16QImode))
FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode))
FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode))
FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode))
+FUNCTION (svdot_za, sme_2mode_function, (UNSPEC_SME_SDOT, UNSPEC_SME_UDOT,
+ UNSPEC_SME_FDOT))
+FUNCTION (svdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SDOT,
+ UNSPEC_SME_UDOT,
+ UNSPEC_SME_FDOT))
FUNCTION (svld1_hor_za, svld1_za_impl, (UNSPEC_SME_LD1_HOR))
FUNCTION (svld1_ver_za, svld1_za_impl, (UNSPEC_SME_LD1_VER))
FUNCTION (svldr_za, svldr_za_impl, )
+FUNCTION (svldr_zt, svldr_zt_impl, )
+FUNCTION (svluti2_lane_zt, svluti_lane_zt_impl, (2))
+FUNCTION (svluti4_lane_zt, svluti_lane_zt_impl, (4))
+FUNCTION (svmla_za, sme_2mode_function, (UNSPEC_SME_SMLA, UNSPEC_SME_UMLA,
+ UNSPEC_SME_FMLA))
+FUNCTION (svmla_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLA,
+ UNSPEC_SME_UMLA,
+ UNSPEC_SME_FMLA))
+FUNCTION (svmls_za, sme_2mode_function, (UNSPEC_SME_SMLS, UNSPEC_SME_UMLS,
+ UNSPEC_SME_FMLS))
+FUNCTION (svmls_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLS,
+ UNSPEC_SME_UMLS,
+ UNSPEC_SME_FMLS))
FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA,
UNSPEC_SME_FMOPA))
FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS,
UNSPEC_SME_FMOPS))
+FUNCTION (svread_za, svread_za_impl,)
FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR))
FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER))
FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR))
FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER))
FUNCTION (svstr_za, svstr_za_impl, )
+FUNCTION (svstr_zt, svstr_zt_impl, )
+FUNCTION (svsub_za, sme_1mode_function, (UNSPEC_SME_SUB, UNSPEC_SME_SUB,
+ UNSPEC_SME_FSUB))
+FUNCTION (svsub_write_za, sme_1mode_function, (UNSPEC_SME_SUB_WRITE,
+ UNSPEC_SME_SUB_WRITE, -1))
+FUNCTION (svsudot_za, svsudot_za_impl,)
+FUNCTION (svsudot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUDOT, -1, -1))
+FUNCTION (svsuvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUVDOT,
+ -1, -1))
FUNCTION (svsumopa_za, sme_2mode_function, (UNSPEC_SME_SUMOPA, -1, -1))
FUNCTION (svsumops_za, sme_2mode_function, (UNSPEC_SME_SUMOPS, -1, -1))
FUNCTION (svundef_za, svundef_za_impl, )
+FUNCTION (svusdot_za, sme_2mode_function, (-1, UNSPEC_SME_USDOT, -1))
+FUNCTION (svusdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USDOT, -1))
+FUNCTION (svusvdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USVDOT,
+ -1))
FUNCTION (svusmopa_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPA, -1))
FUNCTION (svusmops_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPS, -1))
+FUNCTION (svvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SVDOT,
+ UNSPEC_SME_UVDOT,
+ UNSPEC_SME_FVDOT))
+FUNCTION (svwrite_za, svwrite_za_impl,)
FUNCTION (svwrite_hor_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_HOR))
FUNCTION (svwrite_ver_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_VER))
FUNCTION (svzero_mask_za, svzero_mask_za_impl, )
FUNCTION (svzero_za, svzero_za_impl, )
+FUNCTION (svzero_zt, svzero_zt_impl, )
} /* end namespace aarch64_sve */
DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_d_float, za_m)
DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_d_float, za_m)
#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_SME2
+DEF_SVE_FUNCTION (svldr_zt, ldr_zt, none, none)
+DEF_SVE_FUNCTION (svstr_zt, str_zt, none, none)
+DEF_SVE_FUNCTION (svzero_zt, inherent_zt, none, none)
+#undef REQUIRED_EXTENSIONS
+
+/* The d_za entries in this section just declare C _za64 overloads,
+ which will then be resolved to either an integer function or a
+ floating-point function. They are needed because the integer and
+ floating-point functions have different architecture requirements. */
+#define REQUIRED_EXTENSIONS AARCH64_FL_SME2 | AARCH64_FL_SM_ON
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_s_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, d_za, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svadd_write, binary_za_slice_opt_single, za_s_integer,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION (svbmopa, binary_za_m, za_s_unsigned, za_m)
+DEF_SME_ZA_FUNCTION (svbmops, binary_za_m, za_s_unsigned, za_m)
+DEF_SME_ZA_FUNCTION_GS (svdot, binary_za_slice_opt_single, za_s_h_data,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot, binary_za_slice_opt_single, za_s_b_integer,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_s_h_data,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_s_b_integer,
+ vg1x24, none)
+DEF_SVE_FUNCTION_GS (svluti2_lane_zt, luti2_lane_zt, bhs_data, x124, none)
+DEF_SVE_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, bhs_data, x12, none)
+DEF_SVE_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, hs_data, x4, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_float,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_h_data,
+ vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_b_integer,
+ vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_s_float,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_s_h_data,
+ vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_s_b_integer,
+ vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_s_float,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_s_h_data,
+ vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_s_b_integer,
+ vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_s_float,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_s_h_data,
+ vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_s_b_integer,
+ vg4, none)
+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_s_h_integer, za_m)
+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_s_h_integer, za_m)
+DEF_SME_ZA_FUNCTION_GS (svread, read_za_slice, za_bhsd_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svread_hor, read_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION_GS (svread_ver, read_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_s_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, d_za, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub_write, binary_za_slice_opt_single, za_s_integer,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsudot, binary_za_slice_uint_opt_single,
+ za_s_b_signed, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsudot_lane, dot_za_slice_uint_lane,
+ za_s_b_signed, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsuvdot_lane, dot_za_slice_uint_lane,
+ za_s_b_signed, vg1x4, none)
+DEF_SME_ZA_FUNCTION_GS (svusdot, binary_za_slice_int_opt_single,
+ za_s_b_unsigned, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svusdot_lane, dot_za_slice_int_lane,
+ za_s_b_unsigned, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svusvdot_lane, dot_za_slice_int_lane,
+ za_s_b_unsigned, vg1x4, none)
+DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_s_h_data,
+ vg1x2, none)
+DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_s_b_integer,
+ vg1x4, none)
+DEF_SME_ZA_FUNCTION_GS (svwrite, write_za_slice, za_bhsd_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svwrite_hor, write_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION_GS (svwrite_ver, write_za, za_bhsd_data, vg24, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SME2 \
+ | AARCH64_FL_SME_I16I64 \
+ | AARCH64_FL_SM_ON)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_integer, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svadd_write, binary_za_slice_opt_single, za_d_integer,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot, binary_za_slice_opt_single, za_d_h_integer,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_d_h_integer,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_d_h_integer,
+ vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_d_h_integer,
+ vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_d_h_integer,
+ vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_h_integer,
+ vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_integer, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub_write, binary_za_slice_opt_single, za_d_integer,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_d_h_integer,
+ vg1x4, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SME2 \
+ | AARCH64_FL_SME_F64F64 \
+ | AARCH64_FL_SM_ON)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_float, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_d_float,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_d_float,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_d_float,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_float,
+ vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_float, vg1x24, none)
+#undef REQUIRED_EXTENSIONS
{
extern const function_base *const arm_has_sme;
extern const function_base *const arm_in_streaming_mode;
+ extern const function_base *const svadd_za;
+ extern const function_base *const svadd_write_za;
extern const function_base *const svaddha_za;
extern const function_base *const svaddva_za;
+ extern const function_base *const svbmopa_za;
+ extern const function_base *const svbmops_za;
extern const function_base *const svcntsb;
extern const function_base *const svcntsd;
extern const function_base *const svcntsh;
extern const function_base *const svcntsw;
+ extern const function_base *const svdot_za;
+ extern const function_base *const svdot_lane_za;
extern const function_base *const svld1_hor_za;
extern const function_base *const svld1_ver_za;
extern const function_base *const svldr_za;
+ extern const function_base *const svldr_zt;
+ extern const function_base *const svluti2_lane_zt;
+ extern const function_base *const svluti4_lane_zt;
+ extern const function_base *const svmla_za;
+ extern const function_base *const svmla_lane_za;
+ extern const function_base *const svmls_za;
+ extern const function_base *const svmls_lane_za;
extern const function_base *const svmopa_za;
extern const function_base *const svmops_za;
+ extern const function_base *const svread_za;
extern const function_base *const svread_hor_za;
extern const function_base *const svread_ver_za;
extern const function_base *const svst1_hor_za;
extern const function_base *const svst1_ver_za;
extern const function_base *const svstr_za;
+ extern const function_base *const svstr_zt;
+ extern const function_base *const svsub_za;
+ extern const function_base *const svsub_write_za;
+ extern const function_base *const svsudot_za;
+ extern const function_base *const svsudot_lane_za;
+ extern const function_base *const svsuvdot_lane_za;
extern const function_base *const svsumopa_za;
extern const function_base *const svsumops_za;
+ extern const function_base *const svusdot_za;
+ extern const function_base *const svusdot_lane_za;
+ extern const function_base *const svusvdot_lane_za;
extern const function_base *const svusmopa_za;
extern const function_base *const svusmops_za;
+ extern const function_base *const svwrite_za;
extern const function_base *const svwrite_hor_za;
extern const function_base *const svwrite_ver_za;
extern const function_base *const svundef_za;
- extern const function_base *const svzero_za;
+ extern const function_base *const svvdot_lane_za;
extern const function_base *const svzero_mask_za;
+ extern const function_base *const svzero_za;
+ extern const function_base *const svzero_zt;
}
}
}
};
+class svclamp_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const override
+ {
+ auto mode = e.tuple_mode (0);
+ insn_code icode;
+ if (e.type_suffix (0).float_p)
+ icode = (e.vectors_per_tuple () > 1
+ ? code_for_aarch64_sve_fclamp_single (mode)
+ : code_for_aarch64_sve_fclamp (mode));
+ else
+ {
+ auto max = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
+ icode = (e.vectors_per_tuple () > 1
+ ? code_for_aarch64_sve_clamp_single (max, mode)
+ : code_for_aarch64_sve_clamp (max, mode));
+ }
+ return e.use_exact_insn (icode);
+ }
+};
+
+class svcvtn_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const override
+ {
+ return e.use_exact_insn (code_for_aarch64_sve_cvtn (e.result_mode ()));
+ }
+};
+
class svldnt1_gather_impl : public full_width_access
{
public:
}
};
+class svpext_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const override
+ {
+ unsigned int bits = e.type_suffix (0).element_bits;
+ return e.use_exact_insn (e.vectors_per_tuple () == 2
+ ? code_for_aarch64_sve_pextx2 (bits)
+ : code_for_aarch64_sve_pext (bits));
+ }
+};
+
+class svpsel_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const override
+ {
+ unsigned int bits = e.type_suffix (0).element_bits;
+ return e.use_exact_insn (code_for_aarch64_sve_psel (bits));
+ }
+};
+
class svqcadd_impl : public function_base
{
public:
/* The saturation has no effect, and [SU]RSHL has immediate forms
that we can use for sensible shift amounts. */
function_instance instance ("svrshl", functions::svrshl,
- shapes::binary_int_opt_n, MODE_n,
- f.type_suffix_ids, GROUP_none, f.pred);
+ shapes::binary_int_opt_single_n,
+ MODE_n, f.type_suffix_ids, GROUP_none,
+ f.pred);
return f.redirect_call (instance);
}
}
gimple *
fold (gimple_folder &f) const override
{
+ if (f.vectors_per_tuple () > 1)
+ return nullptr;
+
if (tree amount = uniform_integer_cst_p (gimple_call_arg (f.call, 2)))
{
if (wi::to_widest (amount) >= 0)
machine_mode mode = e.vector_mode (0);
if (e.pred == PRED_x
&& aarch64_sve_sqadd_sqsub_immediate_p (mode, e.args[2], false))
- return e.map_to_rtx_codes (UNKNOWN, US_PLUS, -1);
+ return e.map_to_rtx_codes (UNKNOWN, US_PLUS, -1, -1);
return e.map_to_unspecs (-1, UNSPEC_USQADD, -1);
}
};
}
};
+class svunpk_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const override
+ {
+ optab op = (e.type_suffix (0).unsigned_p ? zext_optab : sext_optab);
+ insn_code icode = convert_optab_handler (op, e.result_mode (),
+ GET_MODE (e.args[0]));
+ return e.use_exact_insn (icode);
+ }
+};
+
class svuqadd_impl : public function_base
{
public:
FUNCTION (svbcax, CODE_FOR_MODE0 (aarch64_sve2_bcax),)
FUNCTION (svbdep, unspec_based_function, (UNSPEC_BDEP, UNSPEC_BDEP, -1))
FUNCTION (svbext, unspec_based_function, (UNSPEC_BEXT, UNSPEC_BEXT, -1))
+FUNCTION (svbfmlslb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlslbvnx4sf))
+FUNCTION (svbfmlslb_lane, fixed_insn_function,
+ (CODE_FOR_aarch64_sve_bfmlslb_lanevnx4sf))
+FUNCTION (svbfmlslt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlsltvnx4sf))
+FUNCTION (svbfmlslt_lane, fixed_insn_function,
+ (CODE_FOR_aarch64_sve_bfmlslt_lanevnx4sf))
FUNCTION (svbgrp, unspec_based_function, (UNSPEC_BGRP, UNSPEC_BGRP, -1))
FUNCTION (svbsl, CODE_FOR_MODE0 (aarch64_sve2_bsl),)
FUNCTION (svbsl1n, CODE_FOR_MODE0 (aarch64_sve2_bsl1n),)
FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
FUNCTION (svcdot, svcdot_impl,)
FUNCTION (svcdot_lane, svcdot_lane_impl,)
+FUNCTION (svclamp, svclamp_impl,)
FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
+FUNCTION (svcvtn, svcvtn_impl,)
FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
UNSPEC_UMULLT, -1))
FUNCTION (svnbsl, CODE_FOR_MODE0 (aarch64_sve2_nbsl),)
FUNCTION (svnmatch, svmatch_svnmatch_impl, (UNSPEC_NMATCH))
+FUNCTION (svpext, svpext_impl,)
FUNCTION (svpmul, CODE_FOR_MODE0 (aarch64_sve2_pmul),)
FUNCTION (svpmullb, unspec_based_function, (-1, UNSPEC_PMULLB, -1))
FUNCTION (svpmullb_pair, unspec_based_function, (-1, UNSPEC_PMULLB_PAIR, -1))
FUNCTION (svpmullt, unspec_based_function, (-1, UNSPEC_PMULLT, -1))
FUNCTION (svpmullt_pair, unspec_based_function, (-1, UNSPEC_PMULLT_PAIR, -1))
+FUNCTION (svpsel, svpsel_impl,)
FUNCTION (svqabs, rtx_code_function, (SS_ABS, UNKNOWN, UNKNOWN))
FUNCTION (svqcadd, svqcadd_impl,)
+FUNCTION (svqcvt, integer_conversion, (UNSPEC_SQCVT, UNSPEC_SQCVTU,
+ UNSPEC_UQCVT, -1))
+FUNCTION (svqcvtn, integer_conversion, (UNSPEC_SQCVTN, UNSPEC_SQCVTUN,
+ UNSPEC_UQCVTN, -1))
FUNCTION (svqdmlalb, unspec_based_qadd_function, (UNSPEC_SQDMULLB, -1, -1))
FUNCTION (svqdmlalb_lane, unspec_based_qadd_lane_function, (UNSPEC_SQDMULLB,
-1, -1))
FUNCTION (svqrdmlsh_lane, unspec_based_lane_function, (UNSPEC_SQRDMLSH,
-1, -1))
FUNCTION (svqrshl, svqrshl_impl,)
+FUNCTION (svqrshr, unspec_based_uncond_function, (UNSPEC_SQRSHR,
+ UNSPEC_UQRSHR, -1, 1))
+FUNCTION (svqrshrn, unspec_based_uncond_function, (UNSPEC_SQRSHRN,
+ UNSPEC_UQRSHRN, -1, 1))
FUNCTION (svqrshrnb, unspec_based_function, (UNSPEC_SQRSHRNB,
UNSPEC_UQRSHRNB, -1))
FUNCTION (svqrshrnt, unspec_based_function, (UNSPEC_SQRSHRNT,
UNSPEC_UQRSHRNT, -1))
+FUNCTION (svqrshru, unspec_based_uncond_function, (UNSPEC_SQRSHRU, -1, -1, 1))
+FUNCTION (svqrshrun, unspec_based_uncond_function, (UNSPEC_SQRSHRUN, -1, -1, 1))
FUNCTION (svqrshrunb, unspec_based_function, (UNSPEC_SQRSHRUNB, -1, -1))
FUNCTION (svqrshrunt, unspec_based_function, (UNSPEC_SQRSHRUNT, -1, -1))
FUNCTION (svqshl, svqshl_impl,)
FUNCTION (svraddhnt, unspec_based_function, (UNSPEC_RADDHNT,
UNSPEC_RADDHNT, -1))
FUNCTION (svrax1, fixed_insn_function, (CODE_FOR_aarch64_sve2_rax1))
+FUNCTION (svrevd, unspec_based_function, (UNSPEC_REVD, UNSPEC_REVD,
+ UNSPEC_REVD))
FUNCTION (svrhadd, unspec_based_function, (UNSPEC_SRHADD, UNSPEC_URHADD, -1))
FUNCTION (svrshl, svrshl_impl,)
FUNCTION (svrshr, unspec_based_function, (UNSPEC_SRSHR, UNSPEC_URSHR, -1))
FUNCTION (svsubwt, unspec_based_function, (UNSPEC_SSUBWT, UNSPEC_USUBWT, -1))
FUNCTION (svtbl2, svtbl2_impl,)
FUNCTION (svtbx, CODE_FOR_MODE0 (aarch64_sve2_tbx),)
+FUNCTION (svunpk, svunpk_impl,)
FUNCTION (svuqadd, svuqadd_impl,)
+FUNCTION (svuzp, multireg_permute, (UNSPEC_UZP))
+FUNCTION (svuzpq, multireg_permute, (UNSPEC_UZPQ))
+FUNCTION (svzip, multireg_permute, (UNSPEC_ZIP))
+FUNCTION (svzipq, multireg_permute, (UNSPEC_ZIPQ))
FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, UNSPEC_WHILEHS))
FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
DEF_SVE_FUNCTION (svqdmlslbt, ternary_long_opt_n, hsd_signed, none)
DEF_SVE_FUNCTION (svqdmlslt, ternary_long_opt_n, hsd_signed, none)
DEF_SVE_FUNCTION (svqdmlslt_lane, ternary_long_lane, sd_signed, none)
-DEF_SVE_FUNCTION (svqdmulh, binary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqdmulh, binary_opt_single_n, all_signed, none)
DEF_SVE_FUNCTION (svqdmulh_lane, binary_lane, hsd_signed, none)
DEF_SVE_FUNCTION (svqdmullb, binary_long_opt_n, hsd_signed, none)
DEF_SVE_FUNCTION (svqdmullb_lane, binary_long_lane, sd_signed, none)
DEF_SVE_FUNCTION (svrecpe, unary, s_unsigned, mxz)
DEF_SVE_FUNCTION (svrhadd, binary_opt_n, all_integer, mxz)
DEF_SVE_FUNCTION (svrsqrte, unary, s_unsigned, mxz)
-DEF_SVE_FUNCTION (svrshl, binary_int_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svrshl, binary_int_opt_single_n, all_integer, mxz)
DEF_SVE_FUNCTION (svrshr, shift_right_imm, all_integer, mxz)
DEF_SVE_FUNCTION (svrshrnb, shift_right_imm_narrowb, hsd_integer, none)
DEF_SVE_FUNCTION (svrshrnt, shift_right_imm_narrowt, hsd_integer, none)
DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
+ | AARCH64_FL_SVE2 \
+ | AARCH64_FL_SME \
+ | AARCH64_FL_SM_ON)
+DEF_SVE_FUNCTION (svclamp, clamp, all_integer, none)
+DEF_SVE_FUNCTION (svpsel, select_pred, all_pred_count, none)
+DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
+ | AARCH64_FL_SVE2 \
+ | AARCH64_FL_SME2 \
+ | AARCH64_FL_SM_ON)
+DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
+DEF_SVE_FUNCTION (svbfmlslb, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlslb_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmlslt, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlslt_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svclamp, clamp, all_float, none)
+DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
+DEF_SVE_FUNCTION (svcntp, count_pred_c, all_count, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_h_s_float, x2, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_s_s, x24, none)
+DEF_SVE_FUNCTION_GS (svcvtn, unary_convertxn, cvt_h_s_float, x2, none)
+DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n_or_011, s_narrow_fsu, none)
+DEF_SVE_FUNCTION (svdot_lane, ternary_qq_or_011_lane, s_narrow_fsu, none)
+DEF_SVE_FUNCTION_GS (svld1, load, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svldnt1, load, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svmax, binary_opt_single_n, all_arith, x24, none)
+DEF_SVE_FUNCTION_GS (svmaxnm, binary_opt_single_n, all_float, x24, none)
+DEF_SVE_FUNCTION_GS (svmin, binary_opt_single_n, all_arith, x24, none)
+DEF_SVE_FUNCTION_GS (svminnm, binary_opt_single_n, all_float, x24, none)
+DEF_SVE_FUNCTION_GS (svpext, extract_pred, all_count, x12, none)
+DEF_SVE_FUNCTION (svptrue, inherent, all_count, none)
+DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqdmulh, binary_opt_single_n, all_signed, x24, none)
+DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svrinta, unaryxn, s_float, x24, none)
+DEF_SVE_FUNCTION_GS (svrintm, unaryxn, s_float, x24, none)
+DEF_SVE_FUNCTION_GS (svrintn, unaryxn, s_float, x24, none)
+DEF_SVE_FUNCTION_GS (svrintp, unaryxn, s_float, x24, none)
+DEF_SVE_FUNCTION_GS (svrshl, binary_int_opt_single_n, all_integer, x24, none)
+DEF_SVE_FUNCTION_GS (svsel, binaryxn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svst1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svstnt1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svunpk, unary_convertxn, bhs_widen, x24, none)
+DEF_SVE_FUNCTION_GS (svuzp, unaryxn, all_data, x24, none)
+DEF_SVE_FUNCTION_GS (svuzpq, unaryxn, all_data, x24, none)
+DEF_SVE_FUNCTION_GS (svwhilege, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilege, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilegt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilegt, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilele, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilele, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilelt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilelt, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svzip, unaryxn, all_data, x24, none)
+DEF_SVE_FUNCTION_GS (svzipq, unaryxn, all_data, x24, none)
+#undef REQUIRED_EXTENSIONS
extern const function_base *const svbcax;
extern const function_base *const svbdep;
extern const function_base *const svbext;
+ extern const function_base *const svbfmlslb;
+ extern const function_base *const svbfmlslb_lane;
+ extern const function_base *const svbfmlslt;
+ extern const function_base *const svbfmlslt_lane;
extern const function_base *const svbgrp;
extern const function_base *const svbsl;
extern const function_base *const svbsl1n;
extern const function_base *const svbsl2n;
extern const function_base *const svcdot;
extern const function_base *const svcdot_lane;
+ extern const function_base *const svclamp;
+ extern const function_base *const svcntp;
extern const function_base *const svcvtlt;
+ extern const function_base *const svcvtn;
extern const function_base *const svcvtx;
extern const function_base *const svcvtxnt;
extern const function_base *const sveor3;
extern const function_base *const svmullt_lane;
extern const function_base *const svnbsl;
extern const function_base *const svnmatch;
+ extern const function_base *const svpext;
extern const function_base *const svpmul;
extern const function_base *const svpmullb;
extern const function_base *const svpmullb_pair;
extern const function_base *const svpmullt;
extern const function_base *const svpmullt_pair;
+ extern const function_base *const svpsel;
extern const function_base *const svqabs;
extern const function_base *const svqcadd;
+ extern const function_base *const svqcvt;
+ extern const function_base *const svqcvtn;
extern const function_base *const svqdmlalb;
extern const function_base *const svqdmlalb_lane;
extern const function_base *const svqdmlalbt;
extern const function_base *const svqrdmlsh;
extern const function_base *const svqrdmlsh_lane;
extern const function_base *const svqrshl;
+ extern const function_base *const svqrshr;
+ extern const function_base *const svqrshrn;
extern const function_base *const svqrshrnb;
extern const function_base *const svqrshrnt;
+ extern const function_base *const svqrshru;
+ extern const function_base *const svqrshrun;
extern const function_base *const svqrshrunb;
extern const function_base *const svqrshrunt;
extern const function_base *const svqshl;
extern const function_base *const svraddhnb;
extern const function_base *const svraddhnt;
extern const function_base *const svrax1;
+ extern const function_base *const svrevd;
extern const function_base *const svrhadd;
extern const function_base *const svrshl;
extern const function_base *const svrshr;
extern const function_base *const svsubwt;
extern const function_base *const svtbl2;
extern const function_base *const svtbx;
+ extern const function_base *const svunpk;
extern const function_base *const svuqadd;
+ extern const function_base *const svuzp;
+ extern const function_base *const svuzpq;
+ extern const function_base *const svzip;
+ extern const function_base *const svzipq;
extern const function_base *const svwhilege;
extern const function_base *const svwhilegt;
extern const function_base *const svwhilerw;
#define TYPES_all_pred(S, D) \
S (b8), S (b16), S (b32), S (b64)
+/* _c8 _c16 _c32 _c64. */
+#define TYPES_all_count(S, D) \
+ S (c8), S (c16), S (c32), S (c64)
+
+/* _b8 _b16 _b32 _b64
+ _c8 _c16 _c32 _c64. */
+#define TYPES_all_pred_count(S, D) \
+ TYPES_all_pred (S, D), \
+ TYPES_all_count (S, D)
+
/* _f16 _f32 _f64. */
#define TYPES_all_float(S, D) \
S (f16), S (f32), S (f64)
#define TYPES_b(S, D) \
S (b)
+/* _c only. */
+#define TYPES_c(S, D) \
+ S (c)
+
/* _u8. */
#define TYPES_b_unsigned(S, D) \
S (u8)
#define TYPES_bhs_integer(S, D) \
TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D)
+/* _bf16
+ _f16 _f32
+ _s8 _s16 _s32
+ _u8 _u16 _u32. */
+#define TYPES_bhs_data(S, D) \
+ S (bf16), S (f16), S (f32), TYPES_bhs_integer (S, D)
+
+/* _s16_s8 _s32_s16 _s64_s32
+ _u16_u8 _u32_u16 _u64_u32. */
+#define TYPES_bhs_widen(S, D) \
+ D (s16, s8), D (s32, s16), D (s64, s32), \
+ D (u16, u8), D (u32, u16), D (u64, u32)
+
/* _s16
_u16. */
#define TYPES_h_integer(S, D) \
#define TYPES_hs_float(S, D) \
S (f16), S (f32)
+/* _bf16
+ _f16 _f32
+ _s16 _s32
+ _u16 _u32. */
+#define TYPES_hs_data(S, D) \
+ S (bf16), S (f16), S (f32), TYPES_hs_integer (S, D)
+
/* _u16 _u64. */
#define TYPES_hd_unsigned(S, D) \
S (u16), S (u64)
#define TYPES_cvt_bfloat(S, D) \
D (bf16, f32)
+/* { _bf16 _f16 } x _f32. */
+#define TYPES_cvt_h_s_float(S, D) \
+ D (bf16, f32), D (f16, f32)
+
/* _f32_f16
_f64_f32. */
#define TYPES_cvt_long(S, D) \
#define TYPES_cvt_narrow(S, D) \
D (f16, f32), TYPES_cvt_narrow_s (S, D)
+/* { _s32 _u32 } x _f32
+
+ _f32 x { _s32 _u32 }. */
+#define TYPES_cvt_s_s(S, D) \
+ D (s32, f32), \
+ D (u32, f32), \
+ D (f32, s32), \
+ D (f32, u32)
+
/* { _s32 _s64 } x { _b8 _b16 _b32 _b64 }
{ _u32 _u64 }. */
#define TYPES_inc_dec_n1(D, A) \
TYPES_inc_dec_n1 (D, u32), \
TYPES_inc_dec_n1 (D, u64)
+/* { _s16 _u16 } x _s32
+
+ { _u16 } x _u32. */
+#define TYPES_qcvt_x2(S, D) \
+ D (s16, s32), \
+ D (u16, u32), \
+ D (u16, s32)
+
+/* { _s8 _u8 } x _s32
+
+ { _u8 } x _u32
+
+ { _s16 _u16 } x _s64
+
+ { _u16 } x _u64. */
+#define TYPES_qcvt_x4(S, D) \
+ D (s8, s32), \
+ D (u8, u32), \
+ D (u8, s32), \
+ D (s16, s64), \
+ D (u16, u64), \
+ D (u16, s64)
+
+/* _s16_s32
+ _u16_u32. */
+#define TYPES_qrshr_x2(S, D) \
+ D (s16, s32), \
+ D (u16, u32)
+
+/* _u16_s32. */
+#define TYPES_qrshru_x2(S, D) \
+ D (u16, s32)
+
+/* _s8_s32
+ _s16_s64
+ _u8_u32
+ _u16_u64. */
+#define TYPES_qrshr_x4(S, D) \
+ D (s8, s32), \
+ D (s16, s64), \
+ D (u8, u32), \
+ D (u16, u64)
+
+/* _u8_s32
+ _u16_s64. */
+#define TYPES_qrshru_x4(S, D) \
+ D (u8, s32), \
+ D (u16, s64)
+
/* { _bf16 } { _bf16 }
{ _f16 _f32 _f64 } { _f16 _f32 _f64 }
{ _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
TYPES_while1 (D, b32), \
TYPES_while1 (D, b64)
+/* { _b8 _b16 _b32 _b64 } x { _s64 }
+ { _u64 } */
+#define TYPES_while_x(S, D) \
+ D (b8, s64), D (b8, u64), \
+ D (b16, s64), D (b16, u64), \
+ D (b32, s64), D (b32, u64), \
+ D (b64, s64), D (b64, u64)
+
+/* { _c8 _c16 _c32 _c64 } x { _s64 }
+ { _u64 } */
+#define TYPES_while_x_c(S, D) \
+ D (c8, s64), D (c8, u64), \
+ D (c16, s64), D (c16, u64), \
+ D (c32, s64), D (c32, u64), \
+ D (c64, s64), D (c64, u64)
+
+/* _f32_f16
+ _s32_s16
+ _u32_u16. */
+#define TYPES_s_narrow_fsu(S, D) \
+ D (f32, f16), D (s32, s16), D (u32, u16)
+
/* _za8 _za16 _za32 _za64 _za128. */
#define TYPES_all_za(S, D) \
S (za8), S (za16), S (za32), S (za64), S (za128)
TYPES_za_bhsd_data (S, D), \
TYPES_reinterpret1 (D, za128)
+/* _za32_s8. */
+#define TYPES_za_s_b_signed(S, D) \
+ D (za32, s8)
+
+/* _za32_u8. */
+#define TYPES_za_s_b_unsigned(S, D) \
+ D (za32, u8)
+
+/* _za32 x { _s8 _u8 }. */
+#define TYPES_za_s_b_integer(S, D) \
+ D (za32, s8), D (za32, u8)
+
+/* _za32 x { _s16 _u16 }. */
+#define TYPES_za_s_h_integer(S, D) \
+ D (za32, s16), D (za32, u16)
+
+/* _za32 x { _bf16 _f16 _s16 _u16 }. */
+#define TYPES_za_s_h_data(S, D) \
+ D (za32, bf16), D (za32, f16), D (za32, s16), D (za32, u16)
+
+/* _za32_u32. */
+#define TYPES_za_s_unsigned(S, D) \
+ D (za32, u32)
+
/* _za32 x { _s32 _u32 }. */
#define TYPES_za_s_integer(S, D) \
D (za32, s32), D (za32, u32)
+/* _za32_f32. */
+#define TYPES_za_s_float(S, D) \
+ D (za32, f32)
+
+/* _za32 x { _f32 _s32 _u32 }. */
+#define TYPES_za_s_data(S, D) \
+ D (za32, f32), D (za32, s32), D (za32, u32)
+
+/* _za64 x { _s16 _u16 }. */
+#define TYPES_za_d_h_integer(S, D) \
+ D (za64, s16), D (za64, u16)
/* _za64_f64. */
#define TYPES_za_d_float(S, D) \
/* Create an array for each TYPES_<combination> macro above. */
DEF_SVE_TYPES_ARRAY (all_pred);
+DEF_SVE_TYPES_ARRAY (all_count);
+DEF_SVE_TYPES_ARRAY (all_pred_count);
DEF_SVE_TYPES_ARRAY (all_float);
DEF_SVE_TYPES_ARRAY (all_signed);
DEF_SVE_TYPES_ARRAY (all_float_and_signed);
DEF_SVE_TYPES_ARRAY (bhs_signed);
DEF_SVE_TYPES_ARRAY (bhs_unsigned);
DEF_SVE_TYPES_ARRAY (bhs_integer);
+DEF_SVE_TYPES_ARRAY (bhs_data);
+DEF_SVE_TYPES_ARRAY (bhs_widen);
+DEF_SVE_TYPES_ARRAY (c);
DEF_SVE_TYPES_ARRAY (h_integer);
DEF_SVE_TYPES_ARRAY (hs_signed);
DEF_SVE_TYPES_ARRAY (hs_integer);
DEF_SVE_TYPES_ARRAY (hs_float);
+DEF_SVE_TYPES_ARRAY (hs_data);
DEF_SVE_TYPES_ARRAY (hd_unsigned);
DEF_SVE_TYPES_ARRAY (hsd_signed);
DEF_SVE_TYPES_ARRAY (hsd_integer);
DEF_SVE_TYPES_ARRAY (d_data);
DEF_SVE_TYPES_ARRAY (cvt);
DEF_SVE_TYPES_ARRAY (cvt_bfloat);
+DEF_SVE_TYPES_ARRAY (cvt_h_s_float);
DEF_SVE_TYPES_ARRAY (cvt_long);
DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
DEF_SVE_TYPES_ARRAY (cvt_narrow);
+DEF_SVE_TYPES_ARRAY (cvt_s_s);
DEF_SVE_TYPES_ARRAY (inc_dec_n);
+DEF_SVE_TYPES_ARRAY (qcvt_x2);
+DEF_SVE_TYPES_ARRAY (qcvt_x4);
+DEF_SVE_TYPES_ARRAY (qrshr_x2);
+DEF_SVE_TYPES_ARRAY (qrshr_x4);
+DEF_SVE_TYPES_ARRAY (qrshru_x2);
+DEF_SVE_TYPES_ARRAY (qrshru_x4);
DEF_SVE_TYPES_ARRAY (reinterpret);
DEF_SVE_TYPES_ARRAY (reinterpret_b);
DEF_SVE_TYPES_ARRAY (while);
+DEF_SVE_TYPES_ARRAY (while_x);
+DEF_SVE_TYPES_ARRAY (while_x_c);
+DEF_SVE_TYPES_ARRAY (s_narrow_fsu);
DEF_SVE_TYPES_ARRAY (all_za);
DEF_SVE_TYPES_ARRAY (d_za);
+DEF_SVE_TYPES_ARRAY (za_bhsd_data);
DEF_SVE_TYPES_ARRAY (za_all_data);
+DEF_SVE_TYPES_ARRAY (za_s_b_signed);
+DEF_SVE_TYPES_ARRAY (za_s_b_unsigned);
+DEF_SVE_TYPES_ARRAY (za_s_b_integer);
+DEF_SVE_TYPES_ARRAY (za_s_h_integer);
+DEF_SVE_TYPES_ARRAY (za_s_h_data);
+DEF_SVE_TYPES_ARRAY (za_s_unsigned);
DEF_SVE_TYPES_ARRAY (za_s_integer);
+DEF_SVE_TYPES_ARRAY (za_s_float);
+DEF_SVE_TYPES_ARRAY (za_s_data);
+DEF_SVE_TYPES_ARRAY (za_d_h_integer);
DEF_SVE_TYPES_ARRAY (za_d_float);
DEF_SVE_TYPES_ARRAY (za_d_integer);
DEF_SVE_TYPES_ARRAY (mop_base);
GROUP_none, NUM_GROUP_SUFFIXES
};
+static const group_suffix_index groups_x2[] = { GROUP_x2, NUM_GROUP_SUFFIXES };
+
+static const group_suffix_index groups_x12[] = {
+ GROUP_none, GROUP_x2, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_x4[] = { GROUP_x4, NUM_GROUP_SUFFIXES };
+
+static const group_suffix_index groups_x24[] = {
+ GROUP_x2, GROUP_x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_x124[] = {
+ GROUP_none, GROUP_x2, GROUP_x4, NUM_GROUP_SUFFIXES
+};
+
static const group_suffix_index groups_x1234[] = {
GROUP_none, GROUP_x2, GROUP_x3, GROUP_x4, NUM_GROUP_SUFFIXES
};
+static const group_suffix_index groups_vg1x2[] = {
+ GROUP_vg1x2, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg1x4[] = {
+ GROUP_vg1x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg1x24[] = {
+ GROUP_vg1x2, GROUP_vg1x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg2[] = {
+ GROUP_vg2x1, GROUP_vg2x2, GROUP_vg2x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg4[] = {
+ GROUP_vg4x1, GROUP_vg4x2, GROUP_vg4x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg24[] = {
+ GROUP_vg2, GROUP_vg4, NUM_GROUP_SUFFIXES
+};
+
/* Used by functions that have no governing predicate. */
static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
return true;
/* Handle direct reads of global state. */
- return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA);
+ return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA | CP_READ_ZT0);
}
/* Return true if calls to the function could modify some form of
return true;
/* Handle direct modifications of global state. */
- return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA);
+ return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA | CP_WRITE_ZT0);
}
/* Return true if calls to the function could raise a signal. */
};
static state_flag_info state_flags[] =
{
- { "za", CP_READ_ZA, CP_WRITE_ZA }
+ { "za", CP_READ_ZA, CP_WRITE_ZA },
+ { "zt0", CP_READ_ZT0, CP_WRITE_ZT0 }
};
tree args = NULL_TREE;
auto add_group_suffix = [&](group_suffix_index group_suffix_id,
unsigned int pi)
{
+ if (mode == MODE_single
+ && group_suffixes[group_suffix_id].vectors_per_tuple == 1)
+ return;
+
if (!explicit_type0 && !explicit_type1)
/* Deal with the common case in which there is one overloaded
function for all type combinations. */
argno + 1, fndecl, num_vectors);
}
+/* Report that arguments FIRST_ARGNO and ARGNO have different numbers
+ of vectors, but are required to have the same number of vectors.
+ FIRST_TYPE and TYPE are the types that arguments FIRST_ARGNO and
+ ARGNO actually have. */
+void
+function_resolver::report_mismatched_num_vectors (unsigned int first_argno,
+ sve_type first_type,
+ unsigned int argno,
+ sve_type type)
+{
+ /* If the tuple size is implied by the group suffix, and if the first
+ type had the right number of vectors, treat argument ARGNO as being
+ individually wrong, rather than wrong in relation to FIRST_ARGNO. */
+ if (group_suffix_id != GROUP_none
+ && first_type.num_vectors == vectors_per_tuple ())
+ {
+ report_incorrect_num_vectors (argno, type, first_type.num_vectors);
+ return;
+ }
+
+ /* Make sure that FIRST_TYPE itself is sensible before using it
+ as a basis for an error message. */
+ if (resolve_to (mode_suffix_id, first_type) == error_mark_node)
+ return;
+
+ if (type.num_vectors != 1 && first_type.num_vectors == 1)
+ error_at (location, "passing tuple %qT to argument %d of %qE after"
+ " passing single vector %qT to argument %d",
+ get_vector_type (type), argno + 1, fndecl,
+ get_vector_type (first_type), first_argno + 1);
+ else if (type.num_vectors == 1 && first_type.num_vectors != 1)
+ error_at (location, "passing single vector %qT to argument %d"
+ " of %qE after passing tuple %qT to argument %d",
+ get_vector_type (type), argno + 1, fndecl,
+ get_vector_type (first_type), first_argno + 1);
+ else
+ error_at (location, "passing mismatched tuple types %qT and %qT"
+ " to arguments %d and %d of %qE",
+ get_vector_type (first_type), get_vector_type (type),
+ first_argno + 1, argno + 1, fndecl);
+}
+
/* Report that the function has no form that takes type TYPE.
Return error_mark_node. */
tree
return report_no_such_form (type0);
if (type0 == type_suffix_ids[0])
return report_no_such_form (type1);
- /* To be filled in when we have other cases. */
- gcc_unreachable ();
+ error_at (location, "%qE has no form that takes %qT and %qT arguments",
+ fndecl, get_vector_type (type0), get_vector_type (type1));
+ return error_mark_node;
}
return res;
}
return report_no_such_form (type);
}
+/* Like resolve_to, but used for a conversion function with the following
+ properties:
+
+ - The function has an explicit first type suffix.
+ - The elements of the argument (which has type TYPE) might be narrower
+ or wider than the elements of the return type.
+ - The return type has enough vectors to represent the converted value
+ of every element.
+ - The group suffix describes the wider of the argument type and the
+ return type. */
+tree
+function_resolver::resolve_conversion (mode_suffix_index mode, sve_type type)
+{
+ auto ret_type = type_suffix_ids[0];
+ unsigned int num_ret_vectors = (type.num_vectors
+ * type_suffixes[ret_type].element_bits
+ / type_suffixes[type.type].element_bits);
+ if (num_ret_vectors == 1
+ || num_ret_vectors == 2
+ || num_ret_vectors == 4)
+ {
+ unsigned int num_vectors = MAX (num_ret_vectors, type.num_vectors);
+ if (tree res = lookup_form (mode, { type.type, num_vectors }))
+ return res;
+ }
+ return report_no_such_form (type);
+}
+
+/* Require argument ARGNO to be an svbool_t or svcount_t predicate.
+ Return its type on success, otherwise report an error and return
+ NUM_VECTOR_TYPES. */
+vector_type_index
+function_resolver::infer_predicate_type (unsigned int argno)
+{
+ tree actual = get_argument_type (argno);
+ if (actual == error_mark_node)
+ return NUM_VECTOR_TYPES;
+
+ for (auto index : { VECTOR_TYPE_svbool_t, VECTOR_TYPE_svcount_t })
+ if (matches_type_p (acle_vector_types[0][index], actual))
+ return index;
+
+ error_at (location, "passing %qT to argument %d of %qE, which expects"
+ " an %qs or %qs", actual, argno + 1, fndecl, "svbool_t",
+ "svcount_t");
+ return NUM_VECTOR_TYPES;
+}
+
/* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type.
Return the associated type suffix on success, otherwise report an
error and return NUM_TYPE_SUFFIXES. */
return NUM_TYPE_SUFFIXES;
}
+/* Return arguments ARGNO and ARGNO + 1 to be 64-bit scalar integers
+ of the same signedness, or be a combination that converts unambiguously
+ to such a pair. Return the associated type suffix if they are,
+ otherwise report an error and return NUM_TYPE_SUFFIXES. */
+type_suffix_index
+function_resolver::infer_64bit_scalar_integer_pair (unsigned int argno)
+{
+ /* Require two scalar integers, with one having 64 bits and the other
+ one being no bigger. */
+ tree types[] = { get_argument_type (argno), get_argument_type (argno + 1) };
+ if (!INTEGRAL_TYPE_P (types[0])
+ || !INTEGRAL_TYPE_P (types[1])
+ || MAX (TYPE_PRECISION (types[0]), TYPE_PRECISION (types[1])) != 64)
+ {
+ error_at (location, "passing %qT and %qT to arguments %d and %d of %qE,"
+ " which expects a pair of 64-bit integers", types[0], types[1],
+ argno + 1, argno + 2, fndecl);
+ return NUM_TYPE_SUFFIXES;
+ }
+
+ /* Allow signed integers smaller than int64_t to be paired with an int64_t.
+ Allow unsigned integers smaller than uint64_t to be paired with any
+ 64-bit integer. */
+ for (int i = 0; i < 2; ++i)
+ {
+ if (TYPE_PRECISION (types[i]) != 64)
+ continue;
+
+ if (TYPE_UNSIGNED (types[1 - i]) != TYPE_UNSIGNED (types[i]))
+ {
+ if (TYPE_PRECISION (types[1 - i]) == 64)
+ continue;
+ if (!TYPE_UNSIGNED (types[1 - i]))
+ continue;
+ }
+ return TYPE_UNSIGNED (types[i]) ? TYPE_SUFFIX_u64 : TYPE_SUFFIX_s64;
+ }
+
+ error_at (location, "passing mismatched integer types %qT and %qT"
+ " to arguments %d and %d of %qE", types[0], types[1],
+ argno + 1, argno + 2, fndecl);
+ return NUM_TYPE_SUFFIXES;
+}
+
/* Require argument ARGNO to be a pointer to a scalar type that has a
corresponding type suffix. Return that type suffix on success,
otherwise report an error and return NUM_TYPE_SUFFIXES.
return infer_vector_or_tuple_type (argno, vectors_per_tuple ());
}
+/* PRED_TYPE is the type of a governing predicate argument and DATA_TYPE
+ is the type of an argument that it predicates. Require the two types
+ to "agree": svcount_t must be used for multiple vectors and svbool_t
+ for single vectors.
+
+ Return true if they do agree, otherwise report an error and
+ return false. */
+bool function_resolver::
+require_matching_predicate_type (vector_type_index pred_type,
+ sve_type data_type)
+{
+ if (pred_type == VECTOR_TYPE_svbool_t && data_type.num_vectors == 1)
+ return true;
+
+ if (pred_type == VECTOR_TYPE_svcount_t && data_type.num_vectors != 1)
+ return true;
+
+ /* Make sure that FIRST_TYPE itself is sensible before using it
+ as a basis for an error message. */
+ if (resolve_to (mode_suffix_id, data_type) == error_mark_node)
+ return false;
+
+ if (data_type.num_vectors > 1)
+ error_at (location, "operations on multiple vectors must be predicated"
+ " by %qs rather than %qs", "svcount_t", "svbool_t");
+ else
+ error_at (location, "operations on single vectors must be predicated"
+ " by %qs rather than %qs", "svbool_t", "svcount_t");
+ return false;
+}
+
/* Require argument ARGNO to be a vector or scalar argument. Return true
if it is, otherwise report an appropriate error. */
bool
if (!new_type)
return false;
+ if (type.num_vectors != new_type.num_vectors)
+ {
+ report_mismatched_num_vectors (first_argno, type, argno, new_type);
+ return false;
+ }
+
if (type != new_type)
{
error_at (location, "passing %qT to argument %d of %qE, but"
return true;
}
-/* Require argument ARGNO to be a vector type with the following properties:
+/* Require argument ARGNO to be a vector or tuple type with the following
+ properties:
- the type class must be the same as FIRST_TYPE's if EXPECTED_TCLASS
is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself.
- a quarter of FIRST_TYPE's if EXPECTED_BITS == QUARTER_SIZE
- EXPECTED_BITS itself otherwise
+ - the number of vectors must be the same as FIRST_TYPE's if
+ EXPECTED_NUM_VECTORS is zero, otherwise it must be EXPECTED_NUM_VECTORS.
+
Return true if the argument has the required type, otherwise report
an appropriate error.
unsigned int first_argno,
sve_type first_type,
type_class_index expected_tclass,
- unsigned int expected_bits)
+ unsigned int expected_bits,
+ unsigned int expected_num_vectors)
{
/* If the type needs to match FIRST_ARGNO exactly, use the preferred
error message for that case. */
- if (first_type.num_vectors == 1
- && expected_tclass == SAME_TYPE_CLASS
- && expected_bits == SAME_SIZE)
+ if (expected_tclass == SAME_TYPE_CLASS
+ && expected_bits == SAME_SIZE
+ && expected_num_vectors == 0)
{
/* There's no need to resolve this case out of order. */
gcc_assert (argno > first_argno);
else if (expected_bits == QUARTER_SIZE)
expected_bits = first_type_suffix.element_bits / 4;
+ unsigned int orig_expected_num_vectors = expected_num_vectors;
+ if (expected_num_vectors == 0)
+ expected_num_vectors = first_type.num_vectors;
+
/* If the expected type doesn't depend on FIRST_TYPE at all,
just check for the fixed choice of vector type. */
if (expected_tclass == orig_expected_tclass
- && expected_bits == orig_expected_bits)
+ && expected_bits == orig_expected_bits
+ && orig_expected_num_vectors == 1)
{
const type_suffix_info &expected_suffix
= type_suffixes[find_type_suffix (expected_tclass, expected_bits)];
/* Require the argument to be some form of SVE vector type,
without being specific about the type of vector we want. */
- sve_type actual_type = infer_vector_type (argno);
+ sve_type actual_type = infer_sve_type (argno);
if (!actual_type)
return false;
+ if (actual_type.num_vectors != expected_num_vectors)
+ {
+ if (orig_expected_num_vectors == 0)
+ report_mismatched_num_vectors (first_argno, first_type,
+ argno, actual_type);
+ else
+ report_incorrect_num_vectors (argno, actual_type,
+ expected_num_vectors);
+ return false;
+ }
+
if (orig_expected_tclass == SAME_TYPE_CLASS
&& orig_expected_bits == SAME_SIZE)
{
if (actual_type.type == first_type.type)
return true;
- error_at (location, "passing %qT to argument %d of %qE, but"
- " argument %d was a tuple of %qT",
- get_vector_type (actual_type), argno + 1, fndecl,
- first_argno + 1, get_vector_type (first_type.type));
+ if (first_type.num_vectors > 1)
+ error_at (location, "passing %qT to argument %d of %qE, but"
+ " argument %d was a tuple of %qT",
+ get_vector_type (actual_type), argno + 1, fndecl,
+ first_argno + 1, get_vector_type (first_type.type));
+ else
+ error_at (location, "passing %qT to argument %d of %qE, but"
+ " argument %d had type %qT",
+ get_vector_type (actual_type), argno + 1, fndecl,
+ first_argno + 1, get_vector_type (first_type));
return false;
}
size requirement, without having to refer to FIRST_TYPE. */
if (!size_ok_p && expected_bits == orig_expected_bits)
{
- error_at (location, "passing %qT to argument %d of %qE, which"
- " expects a vector of %d-bit elements",
- get_vector_type (actual_type), argno + 1, fndecl,
- expected_bits);
+ if (expected_num_vectors == 1)
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects a vector of %d-bit elements",
+ get_vector_type (actual_type), argno + 1, fndecl,
+ expected_bits);
+ else
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects vectors of %d-bit elements",
+ get_vector_type (actual_type), argno + 1, fndecl,
+ expected_bits);
return false;
}
translation work for other type classes. */
if (!tclass_ok_p && orig_expected_tclass == TYPE_signed)
{
- error_at (location, "passing %qT to argument %d of %qE, which"
- " expects a vector of signed integers",
- get_vector_type (actual_type), argno + 1, fndecl);
+ if (expected_num_vectors == 1)
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects a vector of signed integers",
+ get_vector_type (actual_type), argno + 1, fndecl);
+ else
+ /* Translation note: could also be written "expects a tuple of
+ signed integer vectors". */
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects vectors of signed integers",
+ get_vector_type (actual_type), argno + 1, fndecl);
return false;
}
if (!tclass_ok_p && orig_expected_tclass == TYPE_unsigned)
{
- error_at (location, "passing %qT to argument %d of %qE, which"
- " expects a vector of unsigned integers",
- get_vector_type (actual_type), argno + 1, fndecl);
+ if (expected_num_vectors == 1)
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects a vector of unsigned integers",
+ get_vector_type (actual_type), argno + 1, fndecl);
+ else
+ /* Translation note: could also be written "expects a tuple of
+ unsigned integer vectors". */
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects vectors of unsigned integers",
+ get_vector_type (actual_type), argno + 1, fndecl);
return false;
}
/* If the arguments have consistent type classes, but a link between
the sizes has been broken, try to describe the error in those terms. */
- if (first_type.num_vectors == 1
- && tclass_ok_p
- && orig_expected_bits == SAME_SIZE)
+ if (tclass_ok_p && orig_expected_bits == SAME_SIZE)
{
if (argno < first_argno)
{
/* Likewise in reverse: look for cases in which the sizes are consistent
but a link between the type classes has been broken. */
- if (first_type.num_vectors == 1
- && size_ok_p
+ if (size_ok_p
&& orig_expected_tclass == SAME_TYPE_CLASS
&& first_type_suffix.integer_p
&& actual_type_suffix.integer_p)
const char *expected)
{
if (!scalar_argument_p (argno))
+ {
+ if (expected)
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects %qs", get_argument_type (argno), argno + 1,
+ fndecl, expected);
+ return false;
+ }
+ return true;
+}
+
+/* Require argument ARGNO to be a nonscalar type, given that it has already
+ passed require_vector_or_scalar_type. Return true if it is, otherwise
+ report an error. This is used when two sets of instructions share the
+ same overloaded function and one accepts scalars while the other
+ doesn't. */
+bool
+function_resolver::require_nonscalar_type (unsigned int argno)
+{
+ if (scalar_argument_p (argno))
{
error_at (location, "passing %qT to argument %d of %qE, which"
- " expects %qs", get_argument_type (argno), argno + 1,
- fndecl, expected);
+ " does not accept scalars for this combination of arguments",
+ get_argument_type (argno), argno + 1, fndecl);
return false;
}
return true;
gcc_assert (!shape->has_merge_argument_p (*this, nops));
nargs = nops + 1;
if (!check_num_arguments (nargs)
- || !require_vector_type (i, VECTOR_TYPE_svbool_t))
+ || !require_vector_type (i, gp_type_index ()))
return false;
i += 1;
}
return resolve_to (mode_suffix_id, inferred_type);
}
+/* Finish resolving a function whose final argument can be a tuple
+ or a vector, with the function having an implicit "_single" suffix
+ in the latter case. This "_single" form might only exist for certain
+ type suffixes.
+
+ ARGNO is the index of the final argument. The inferred type suffix
+ was obtained from argument FIRST_ARGNO, which has type FIRST_TYPE.
+ EXPECTED_TCLASS gives the expected type class for the final tuple
+ or vector.
+
+ Return the function decl of the resolved function on success,
+ otherwise report a suitable error and return error_mark_node. */
+tree function_resolver::
+finish_opt_single_resolution (unsigned int argno, unsigned int first_argno,
+ sve_type first_type,
+ type_class_index expected_tclass)
+{
+ sve_type new_type = infer_sve_type (argno);
+ if (!new_type)
+ return error_mark_node;
+
+ /* If the type is a tuple, require it to match the group suffix. */
+ unsigned int num_vectors = vectors_per_tuple ();
+ if (num_vectors != 1
+ && new_type.num_vectors != 1
+ && new_type.num_vectors != num_vectors)
+ {
+ report_incorrect_num_vectors (argno, new_type, num_vectors);
+ return error_mark_node;
+ }
+
+ auto expected_num_vectors = (new_type.num_vectors == 1 ? 1 : 0);
+ if (!require_derived_vector_type (argno, first_argno, first_type,
+ expected_tclass, SAME_SIZE,
+ expected_num_vectors))
+ return error_mark_node;
+
+ if (new_type.num_vectors == 1 && first_type.num_vectors > 1)
+ {
+ if (tree single_form = lookup_form (MODE_single, first_type))
+ return single_form;
+
+ if (resolve_to (mode_suffix_id, first_type) != error_mark_node)
+ error_at (location, "passing %qT to argument %d of %qE, but its"
+ " %qT form does not accept single vectors",
+ get_vector_type (new_type), argno + 1, fndecl,
+ get_vector_type (first_type));
+ return error_mark_node;
+ }
+ return resolve_to (mode_suffix_id, first_type);
+}
+
/* Resolve a (possibly predicated) unary function. If the function uses
merge predication or if TREAT_AS_MERGE_P is true, there is an extra
vector argument before the governing predicate that specifies the
if (actual != value0 && actual != value1)
{
- report_neither_nor (location, fndecl, argno, actual, 90, 270);
+ report_neither_nor (location, fndecl, argno, actual, value0, value1);
return false;
}
insn_code
function_expander::direct_optab_handler (optab op, unsigned int suffix_i)
{
- return ::direct_optab_handler (op, vector_mode (suffix_i));
+ return ::direct_optab_handler (op, tuple_mode (suffix_i));
}
/* Choose between signed and unsigned direct optabs SIGNED_OP and
has_float_operand_p = true;
}
- if (has_float_operand_p)
+ if (has_float_operand_p
+ && insn_data[icode].n_operands > (int) nops + 2)
{
/* Add a flag that indicates whether unpredicated instructions
are allowed. */
- CODE_FOR_SINT for signed integers
- CODE_FOR_UINT for unsigned integers
- - UNSPEC_FOR_FP for floating-point values
+ - UNSPEC_FOR_COND_FP for predicated floating-point
+ - UNSPEC_FOR_UNCOND_FP for unpredicated floating-point
and where <code_optab> is like <optab>, but uses CODE_FOR_SINT instead
of UNSPEC_FOR_FP for floating-point values.
rtx
function_expander::map_to_rtx_codes (rtx_code code_for_sint,
rtx_code code_for_uint,
- int unspec_for_fp,
+ int unspec_for_cond_fp,
+ int unspec_for_uncond_fp,
unsigned int merge_argno)
{
- machine_mode mode = vector_mode (0);
+ machine_mode mode = tuple_mode (0);
rtx_code code = (type_suffix (0).unsigned_p ? code_for_uint : code_for_sint);
insn_code icode;
+ if (mode_suffix_id == MODE_single)
+ {
+ gcc_assert (pred == PRED_none);
+ if (type_suffix (0).integer_p)
+ icode = code_for_aarch64_sve_single (code, mode);
+ else
+ icode = code_for_aarch64_sve_single (unspec_for_uncond_fp, mode);
+ return use_exact_insn (icode);
+ }
+
/* Handle predicate logic operations, which always use _z predication. */
if (type_suffix (0).tclass == TYPE_bool)
{
if (type_suffix (0).integer_p)
icode = maybe_code_for_aarch64_pred (code, mode);
else
- icode = maybe_code_for_aarch64_pred (unspec_for_fp, mode);
+ icode = maybe_code_for_aarch64_pred (unspec_for_cond_fp, mode);
if (icode != CODE_FOR_nothing)
return use_pred_x_insn (icode);
}
Floating-point operations conventionally use the signed rtx code. */
if (pred == PRED_none || pred == PRED_x)
{
- icode = direct_optab_handler (code_to_optab (code), 0);
+ if (type_suffix (0).float_p && unspec_for_uncond_fp >= 0)
+ icode = maybe_code_for_aarch64_sve (unspec_for_uncond_fp, mode);
+ else
+ icode = direct_optab_handler (code_to_optab (code), 0);
if (icode == CODE_FOR_nothing)
icode = code_for_aarch64_sve (code, mode);
return use_unpred_insn (icode);
if (type_suffix (0).integer_p)
icode = code_for_cond (code, mode);
else
- icode = code_for_cond (unspec_for_fp, mode);
+ icode = code_for_cond (unspec_for_cond_fp, mode);
return use_cond_insn (icode, merge_argno);
}
function_expander::map_to_unspecs (int unspec_for_sint, int unspec_for_uint,
int unspec_for_fp, unsigned int merge_argno)
{
- machine_mode mode = vector_mode (0);
+ machine_mode mode = tuple_mode (0);
int unspec = (!type_suffix (0).integer_p ? unspec_for_fp
: type_suffix (0).unsigned_p ? unspec_for_uint
: unspec_for_sint);
+ if (mode_suffix_id == MODE_single)
+ {
+ gcc_assert (pred == PRED_none);
+ return use_exact_insn (code_for_aarch64_sve_single (unspec, mode));
+ }
+
if (pred == PRED_x)
{
insn_code icode = maybe_code_for_aarch64_pred (unspec, mode);
#endif
DEF_SVE_MODE (n, none, none, none)
+DEF_SVE_MODE (single, none, none, none)
DEF_SVE_MODE (index, none, none, elements)
DEF_SVE_MODE (offset, none, none, bytes)
DEF_SVE_MODE (s32index, none, svint32_t, elements)
DEF_SVE_TYPE_SUFFIX (b64, svbool_t, bool, 64, VNx2BImode)
DEF_SVE_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode)
DEF_SVE_TYPE_SUFFIX (c, svcount_t, count, 8, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (c8, svcount_t, count, 8, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (c16, svcount_t, count, 16, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (c32, svcount_t, count, 32, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (c64, svcount_t, count, 64, VNx16BImode)
DEF_SVE_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode)
DEF_SVE_TYPE_SUFFIX (f32, svfloat32_t, float, 32, VNx4SFmode)
DEF_SVE_TYPE_SUFFIX (f64, svfloat64_t, float, 64, VNx2DFmode)
DEF_SVE_GROUP_SUFFIX (x2, 0, 2)
DEF_SVE_GROUP_SUFFIX (x3, 0, 3)
DEF_SVE_GROUP_SUFFIX (x4, 0, 4)
+DEF_SVE_GROUP_SUFFIX (vg1x2, 1, 2)
+DEF_SVE_GROUP_SUFFIX (vg1x4, 1, 4)
+DEF_SVE_GROUP_SUFFIX (vg2, 2, 2)
+DEF_SVE_GROUP_SUFFIX (vg2x1, 2, 1)
+DEF_SVE_GROUP_SUFFIX (vg2x2, 2, 2)
+DEF_SVE_GROUP_SUFFIX (vg2x4, 2, 4)
+DEF_SVE_GROUP_SUFFIX (vg4, 4, 4)
+DEF_SVE_GROUP_SUFFIX (vg4x1, 4, 1)
+DEF_SVE_GROUP_SUFFIX (vg4x2, 4, 2)
+DEF_SVE_GROUP_SUFFIX (vg4x4, 4, 4)
#include "aarch64-sve-builtins-base.def"
#include "aarch64-sve-builtins-sve2.def"
const unsigned int CP_WRITE_FFR = 1U << 6;
const unsigned int CP_READ_ZA = 1U << 7;
const unsigned int CP_WRITE_ZA = 1U << 8;
+const unsigned int CP_READ_ZT0 = 1U << 9;
+const unsigned int CP_WRITE_ZT0 = 1U << 10;
/* Enumerates the SVE predicate and (data) vector types, together called
"vector types" for brevity. */
bool modifies_global_state_p () const;
bool could_trap_p () const;
+ vector_type_index gp_type_index () const;
+ tree gp_type () const;
+
unsigned int vectors_per_tuple () const;
tree memory_scalar_type () const;
machine_mode memory_vector_mode () const;
bool scalar_argument_p (unsigned int);
void report_incorrect_num_vectors (unsigned int, sve_type, unsigned int);
+ void report_mismatched_num_vectors (unsigned int, sve_type,
+ unsigned int, sve_type);
tree report_no_such_form (sve_type);
tree lookup_form (mode_suffix_index,
type_suffix_index = NUM_TYPE_SUFFIXES,
group_suffix_index = GROUP_none);
tree resolve_to (mode_suffix_index, sve_type);
+ tree resolve_conversion (mode_suffix_index, sve_type);
+ vector_type_index infer_predicate_type (unsigned int);
type_suffix_index infer_integer_scalar_type (unsigned int);
+ type_suffix_index infer_64bit_scalar_integer_pair (unsigned int);
type_suffix_index infer_pointer_type (unsigned int, bool = false);
sve_type infer_sve_type (unsigned int);
sve_type infer_vector_or_tuple_type (unsigned int, unsigned int);
bool require_vector_or_scalar_type (unsigned int);
+ bool require_matching_predicate_type (vector_type_index, sve_type);
bool require_vector_type (unsigned int, vector_type_index);
bool require_matching_vector_type (unsigned int, unsigned int, sve_type);
bool require_derived_vector_type (unsigned int, unsigned int, sve_type,
type_class_index = SAME_TYPE_CLASS,
- unsigned int = SAME_SIZE);
+ unsigned int = SAME_SIZE,
+ unsigned int = 1);
bool require_scalar_type (unsigned int, const char *);
+ bool require_nonscalar_type (unsigned int);
bool require_pointer_type (unsigned int);
bool require_matching_integer_scalar_type (unsigned int, unsigned int,
type_suffix_index);
type_class_index = SAME_TYPE_CLASS,
unsigned int = SAME_SIZE,
type_suffix_index = NUM_TYPE_SUFFIXES);
+ tree finish_opt_single_resolution (unsigned int, unsigned int, sve_type,
+ type_class_index = SAME_TYPE_CLASS);
tree resolve ();
rtx use_contiguous_prefetch_insn (insn_code);
rtx use_contiguous_store_insn (insn_code);
- rtx map_to_rtx_codes (rtx_code, rtx_code, int,
+ rtx map_to_rtx_codes (rtx_code, rtx_code, int, int,
unsigned int = DEFAULT_MERGE_ARGNO);
rtx map_to_unspecs (int, int, int, unsigned int = DEFAULT_MERGE_ARGNO);
bool vector_cst_all_same (tree, unsigned int);
bool is_ptrue (tree, unsigned int);
-/* Return the ACLE type svbool_t. */
-inline tree
-get_svbool_t (void)
-{
- return acle_vector_types[0][VECTOR_TYPE_svbool_t];
-}
-
/* Try to find a mode with the given mode_suffix_info fields. Return the
mode on success or MODE_none on failure. */
inline mode_suffix_index
return !operator== (other);
}
+/* Return the index of the type that should be used as the governing
+ predicate of this function. */
+inline vector_type_index
+function_instance::gp_type_index () const
+{
+ if (group_suffix ().vectors_per_tuple > 1)
+ return VECTOR_TYPE_svcount_t;
+ return VECTOR_TYPE_svbool_t;
+}
+
+/* Return the type that should be used as the governing predicate of
+ this function. */
+inline tree
+function_instance::gp_type () const
+{
+ return acle_vector_types[0][gp_type_index ()];
+}
+
/* If the function operates on tuples of vectors, return the number
of vectors in the tuples, otherwise return 1. */
inline unsigned int
inline machine_mode
function_instance::gp_mode (unsigned int i) const
{
+ /* Multi-vector operations are predicated on an svcount_t, which has
+ mode VNx16BI. */
+ if (group_suffix ().vectors_per_tuple > 1)
+ return VNx16BImode;
return aarch64_sve_pred_mode (type_suffix (i).element_bytes).require ();
}
;; - LD4W
;; -------------------------------------------------------------------------
-;; Predicated LD1.
+;; Predicated LD1 (single).
(define_insn "maskload<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "register_operand" "=w")
(unspec:SVE_ALL
"ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
)
+;; Predicated LD1 (multi), with a count as predicate.
+(define_insn "@aarch64_ld1<mode>"
+ [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_FULLx24
+ [(match_operand:VNx16BI 2 "register_operand" "Uph")
+ (match_operand:SVE_FULLx24 1 "memory_operand" "m")]
+ UNSPEC_LD1_SVE_COUNT))]
+ "TARGET_SME2 && TARGET_STREAMING"
+ "ld1<Vesize>\t%0, %K2/z, %1"
+)
+
;; Unpredicated LD[234].
(define_expand "vec_load_lanes<mode><vsingle>"
[(set (match_operand:SVE_STRUCT 0 "register_operand")
;; - LDNT1W
;; -------------------------------------------------------------------------
-;; Predicated contiguous non-temporal load.
+;; Predicated contiguous non-temporal load (single).
(define_insn "@aarch64_ldnt1<mode>"
[(set (match_operand:SVE_FULL 0 "register_operand" "=w")
(unspec:SVE_FULL
"ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
)
+;; Predicated contiguous non-temporal load (multi).
+(define_insn "@aarch64_ldnt1<mode>"
+ [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_FULLx24
+ [(match_operand:VNx16BI 2 "register_operand" "Uph")
+ (match_operand:SVE_FULLx24 1 "memory_operand" "m")]
+ UNSPEC_LDNT1_SVE_COUNT))]
+ "TARGET_SVE"
+ "ldnt1<Vesize>\t%0, %K2/z, %1"
+)
+
;; -------------------------------------------------------------------------
;; ---- Normal gather loads
;; -------------------------------------------------------------------------
;; - ST4W
;; -------------------------------------------------------------------------
-;; Predicated ST1.
+;; Predicated ST1 (single).
(define_insn "maskstore<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
(unspec:SVE_ALL
"st1<Vesize>\t%1.<Vctype>, %2, %0"
)
+(define_insn "@aarch64_st1<mode>"
+ [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
+ (unspec:SVE_FULLx24
+ [(match_operand:VNx16BI 2 "register_operand" "Uph")
+ (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_dup 0)]
+ UNSPEC_ST1_SVE_COUNT))]
+ "TARGET_SME2 && TARGET_STREAMING"
+ "st1<Vesize>\t%1, %K2, %0"
+)
+
;; Unpredicated ST[234]. This is always a full update, so the dependence
;; on the old value of the memory location (via (match_dup 0)) is redundant.
;; There doesn't seem to be any obvious benefit to treating the all-true
"stnt1<Vesize>\t%1.<Vetype>, %2, %0"
)
+(define_insn "@aarch64_stnt1<mode>"
+ [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
+ (unspec:SVE_FULLx24
+ [(match_operand:VNx16BI 2 "register_operand" "Uph")
+ (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_dup 0)]
+ UNSPEC_STNT1_SVE_COUNT))]
+ "TARGET_SME2 && TARGET_STREAMING"
+ "stnt1<Vesize>\t%1, %K2, %0"
+)
+
;; -------------------------------------------------------------------------
;; ---- Normal scatter stores
;; -------------------------------------------------------------------------
)
;; Four-element integer dot-product by selected lanes with accumulation.
-(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
+(define_insn "@aarch64_<sur>dot_prod_lane<SVE_FULL_SDI:mode><SVE_FULL_BHI:mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand")
(plus:SVE_FULL_SDI
(unspec:SVE_FULL_SDI
- [(match_operand:<VSI2QI> 1 "register_operand")
- (unspec:<VSI2QI>
- [(match_operand:<VSI2QI> 2 "register_operand")
+ [(match_operand:SVE_FULL_BHI 1 "register_operand")
+ (unspec:SVE_FULL_BHI
+ [(match_operand:SVE_FULL_BHI 2 "register_operand")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
DOTPROD)
(match_operand:SVE_FULL_SDI 4 "register_operand")))]
- "TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
- [ w , w , <sve_lane_con> , 0 ; * ] <sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
- [ ?&w , w , <sve_lane_con> , w ; yes ] movprfx\t%0, %4\;<sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
+ "TARGET_SVE
+ && (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4
+ || (TARGET_STREAMING_SME2
+ && <SVE_FULL_SDI:elem_bits> == 32
+ && <SVE_FULL_BHI:elem_bits> == 16))"
+ {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
+ [ w , w , <SVE_FULL_SDI:sve_lane_con> , 0 ; * ] <sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
+ [ ?&w , w , <SVE_FULL_SDI:sve_lane_con> , w ; yes ] movprfx\t%0, %4\;<sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
}
)
}
)
-(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
+(define_insn "@aarch64_<sur>dot_prod_lane<VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>"
[(set (match_operand:VNx4SI_ONLY 0 "register_operand")
(plus:VNx4SI_ONLY
(unspec:VNx4SI_ONLY
- [(match_operand:<VSI2QI> 1 "register_operand")
- (unspec:<VSI2QI>
- [(match_operand:<VSI2QI> 2 "register_operand")
+ [(match_operand:VNx16QI_ONLY 1 "register_operand")
+ (unspec:VNx16QI_ONLY
+ [(match_operand:VNx16QI_ONLY 2 "register_operand")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
DOTPROD_I8MM)
;; - BFDOT (BF16)
;; - BFMLALB (BF16)
;; - BFMLALT (BF16)
+;; - BFMLSLB (SME2)
+;; - BFMLSLT (SME2)
;; - BFMMLA (BF16)
;; -------------------------------------------------------------------------
;; - WHILEWR (SVE2)
;; -------------------------------------------------------------------------
+(define_constants [
+ (SVE_WHILE_B 0)
+ (SVE_WHILE_B_X2 1)
+ (SVE_WHILE_C 2)
+])
+
;; Set element I of the result if (cmp (plus operand1 J) operand2) is
;; true for all J in [0, I].
(define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
[(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+ (unspec:PRED_ALL [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
SVE_WHILE))
(clobber (reg:CC_NZC CC_REGNUM))]
(match_operand 4)
(const_int SVE_KNOWN_PTRUE)
(unspec:PRED_ALL
- [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
SVE_WHILE)]
UNSPEC_PTEST))
(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL [(match_dup 1)
+ (unspec:PRED_ALL [(const_int SVE_WHILE_B)
+ (match_dup 1)
(match_dup 2)]
SVE_WHILE))]
"TARGET_SVE"
(match_operand 4)
(const_int SVE_KNOWN_PTRUE)
(unspec:PRED_ALL
- [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
SVE_WHILE)]
UNSPEC_PTEST))
;; ---- Non-temporal gather loads
;; ---- Non-temporal scatter stores
;;
+;; == Predicate manipulation
+;; ---- [PRED] Predicate-as-counter PTRUE
+;; ---- [PRED] Predicate extraction
+;; ---- [PRED] Predicate selection
+;; ---- [PRED] Predicate count
+;;
+;; == Uniform unary arithmnetic
+;; ---- [FP] Multi-register unary operations
+;;
;; == Uniform binary arithmnetic
+;; ---- [INT] Multi-register operations
+;; ---- [INT] Clamp to minimum/maximum
;; ---- [INT] Multiplication
;; ---- [INT] Scaled high-part multiplication
;; ---- [INT] General binary arithmetic that maps to unspecs
;; ---- [INT] Saturating binary arithmetic
;; ---- [INT] Saturating left shifts
+;; ---- [FP] Clamp to minimum/maximum
;;
;; == Uniform ternary arithmnetic
;; ---- [INT] General ternary arithmetic that maps to unspecs
;; ---- [INT] Sum of absolute differences
;;
;; == Extending arithmetic
+;; ---- [INT] Multi-register widening conversions
;; ---- [INT] Wide binary arithmetic
;; ---- [INT] Long binary arithmetic
;; ---- [INT] Long left shifts
;; ---- [INT] Long binary arithmetic with accumulation
+;; ---- [FP] Multi-register operations
;; ---- [FP] Long multiplication with accumulation
;;
;; == Narrowing arithnetic
;; ---- [INT] Narrowing unary arithmetic
+;; ---- [INT] Multi-vector narrowing unary arithmetic
;; ---- [INT] Narrowing binary arithmetic
;; ---- [INT] Narrowing right shifts
+;; ---- [INT] Multi-vector narrowing right shifts
;;
;; == Pairwise arithmetic
;; ---- [INT] Pairwise arithmetic
;; == Conversions
;; ---- [FP<-FP] Widening conversions
;; ---- [FP<-FP] Narrowing conversions
+;; ---- [FP<-FP] Multi-vector narrowing conversions
+;; ---- [FP<-INT] Multi-vector conversions
+;; ---- [INT<-FP] Multi-vector conversions
;;
;; == Other arithmetic
;; ---- [INT] Reciprocal approximation
;; ---- [INT<-FP] Base-2 logarithm
;; ---- [INT] Polynomial multiplication
;;
+;; == Comparisons and selects
+;; ---- [INT,FP] Select based on predicates as counters
+;; ---- [INT] While tests
+;;
;; == Permutation
+;; ---- [INT,FP] Reversal
;; ---- [INT,FP] General permutes
+;; ---- [INT,FP] Multi-register permutes
;; ---- [INT] Optional bit-permute extensions
;;
;; == General
}
)
+;; =========================================================================
+;; == Predicate manipulation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate-as-counter PTRUE
+;; -------------------------------------------------------------------------
+;; - PTRUE (predicate-as-counter form)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_ptrue_c<BHSD_BITS>"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
+ (unspec:VNx16BI [(const_int BHSD_BITS)] UNSPEC_PTRUE_C))]
+ "TARGET_STREAMING_SME2"
+ "ptrue\t%K0.<bits_etype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate extraction
+;; -------------------------------------------------------------------------
+;; Includes
+;; - PEXT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_pext<BHSD_BITS>"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (unspec:VNx16BI
+ [(match_operand:VNx16BI 1 "register_operand" "Uph")
+ (match_operand:DI 2 "const_int_operand")
+ (const_int BHSD_BITS)]
+ UNSPEC_PEXT))]
+ "TARGET_STREAMING_SME2"
+ "pext\t%0.<bits_etype>, %K1[%2]"
+)
+
+(define_insn "@aarch64_sve_pext<BHSD_BITS>x2"
+ [(set (match_operand:VNx32BI 0 "register_operand" "=Up2")
+ (unspec:VNx32BI
+ [(match_operand:VNx16BI 1 "register_operand" "Uph")
+ (match_operand:DI 2 "const_int_operand")
+ (const_int BHSD_BITS)]
+ UNSPEC_PEXTx2))]
+ "TARGET_STREAMING_SME2"
+ "pext\t{%S0.<bits_etype>, %T0.<bits_etype>}, %K1[%2]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate selection
+;; -------------------------------------------------------------------------
+;; Includes
+;; - PSEL
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_psel<BHSD_BITS>"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (unspec:VNx16BI
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand:VNx16BI 2 "register_operand" "Upa")
+ (match_operand:SI 3 "register_operand" "Ucj")
+ (const_int BHSD_BITS)]
+ UNSPEC_PSEL))]
+ "TARGET_STREAMING_SME2"
+ "psel\t%0, %1, %2.<bits_etype>[%w3, 0]"
+)
+
+(define_insn "*aarch64_sve_psel<BHSD_BITS>_plus"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (unspec:VNx16BI
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand:VNx16BI 2 "register_operand" "Upa")
+ (plus:SI
+ (match_operand:SI 3 "register_operand" "Ucj")
+ (match_operand:SI 4 "const_int_operand"))
+ (const_int BHSD_BITS)]
+ UNSPEC_PSEL))]
+ "TARGET_STREAMING_SME2
+ && UINTVAL (operands[4]) < 128 / <BHSD_BITS>"
+ "psel\t%0, %1, %2.<bits_etype>[%w3, %4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate count
+;; -------------------------------------------------------------------------
+;; Includes
+;; - CNTP (predicate as counter)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_cntp_c<BHSD_BITS>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand:DI 2 "const_int_operand")
+ (const_int BHSD_BITS)]
+ UNSPEC_CNTP_C))]
+ "TARGET_STREAMING_SME2"
+ "cntp\t%x0, %K1.<bits_etype>, vlx%2"
+)
+
+;; =========================================================================
+;; == Uniform unary arithmnetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Multi-register unary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FRINTA
+;; - FRINTM
+;; - FRINTN
+;; - FRINTP
+;; -------------------------------------------------------------------------
+
+(define_insn "<frint_pattern><mode>2"
+ [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_SFx24
+ [(match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")]
+ SVE2_SFx24_UNARY))]
+ "TARGET_STREAMING_SME2"
+ "frint<frint_suffix>\t%0, %1"
+)
+
;; =========================================================================
;; == Uniform binary arithmnetic
;; =========================================================================
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multi-register operations
+;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - ADD
+;; - SMAX
+;; - SMIN
+;; - SQMULH
+;; - SRSHL
+;; - UMAX
+;; - UMIN
+;; - URSHL
+;; -------------------------------------------------------------------------
+
+(define_expand "<optab><mode>3"
+ [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (SVE_INT_BINARY_MULTI:SVE_Ix24
+ (match_operand:SVE_Ix24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))]
+ "TARGET_STREAMING_SME2"
+)
+
+(define_insn "*<optab><mode>3"
+ [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (SVE_INT_BINARY_MULTI:SVE_Ix24
+ (match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
+ (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))]
+ "TARGET_STREAMING_SME2"
+ "<sve_int_op>\t%0, %0, %2"
+)
+
+(define_insn "@aarch64_sve_single_<optab><mode>"
+ [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (SVE_INT_BINARY_SINGLE:SVE_Ix24
+ (match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
+ (vec_duplicate:SVE_Ix24
+ (match_operand:<VSINGLE> 2 "register_operand" "x"))))]
+ "TARGET_STREAMING_SME2"
+ "<sve_int_op>\t%0, %0, %2.<Vetype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_Ix24
+ [(match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
+ (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SVE_INT_BINARY_MULTI))]
+ "TARGET_STREAMING_SME2"
+ "<sve_int_op>\t%0, %0, %2"
+)
+
+(define_insn "@aarch64_sve_single_<sve_int_op><mode>"
+ [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_Ix24
+ [(match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
+ (vec_duplicate:SVE_Ix24
+ (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+ SVE_INT_BINARY_MULTI))]
+ "TARGET_STREAMING_SME2"
+ "<sve_int_op>\t%0, %0, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Clamp to minimum/maximum
+;; -------------------------------------------------------------------------
+;; - SCLAMP
+;; - UCLAMP
+;; -------------------------------------------------------------------------
+
+;; The minimum is applied after the maximum, which matters if the maximum
+;; bound is (unexpectedly) less than the minimum bound.
+(define_insn "@aarch64_sve_<su>clamp<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (<max_opp>:SVE_FULL_I
+ (USMAX:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand"))
+ (match_operand:SVE_FULL_I 3 "register_operand")))]
+ "TARGET_STREAMING_SME"
+ {@ [cons: =0, 1, 2, 3; attrs: movprfx]
+ [ w, %0, w, w; * ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ [ ?&w, w, w, w; yes ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ }
+)
+
+(define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (<max_opp>:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_operand 5)
+ (USMAX:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand"))]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 3 "register_operand"))]
+ UNSPEC_PRED_X))]
+ "TARGET_STREAMING_SME"
+ {@ [cons: =0, 1, 2, 3; attrs: movprfx]
+ [ w, %0, w, w; * ] #
+ [ ?&w, w, w, w; yes ] #
+ }
+ "&& true"
+ [(set (match_dup 0)
+ (<max_opp>:SVE_FULL_I
+ (USMAX:SVE_FULL_I
+ (match_dup 1)
+ (match_dup 2))
+ (match_dup 3)))]
+)
+
+(define_insn "@aarch64_sve_<su>clamp_single<mode>"
+ [(set (match_operand:SVE_Ix24 0 "register_operand" "=Uw<vector_count>")
+ (<max_opp>:SVE_Ix24
+ (USMAX:SVE_Ix24
+ (match_operand:SVE_Ix24 1 "register_operand" "0")
+ (vec_duplicate:SVE_Ix24
+ (match_operand:<VSINGLE> 2 "register_operand" "w")))
+ (vec_duplicate:SVE_Ix24
+ (match_operand:<VSINGLE> 3 "register_operand" "w"))))]
+ "TARGET_STREAMING_SME2"
+ "<su>clamp\t%0, %2.<Vetype>, %3.<Vetype>"
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Multiplication
;; -------------------------------------------------------------------------
[(set_attr "movprfx" "yes")]
)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Clamp to minimum/maximum
+;; -------------------------------------------------------------------------
+;; - FCLAMP
+;; -------------------------------------------------------------------------
+
+;; The minimum is applied after the maximum, which matters if the maximum
+;; bound is (unexpectedly) less than the minimum bound.
+(define_insn "@aarch64_sve_fclamp<mode>"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand")
+ (unspec:SVE_FULL_F
+ [(unspec:SVE_FULL_F
+ [(match_operand:SVE_FULL_F 1 "register_operand")
+ (match_operand:SVE_FULL_F 2 "register_operand")]
+ UNSPEC_FMAXNM)
+ (match_operand:SVE_FULL_F 3 "register_operand")]
+ UNSPEC_FMINNM))]
+ "TARGET_STREAMING_SME"
+ {@ [cons: =0, 1, 2, 3; attrs: movprfx]
+ [ w, %0, w, w; * ] fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ [ ?&w, w, w, w; yes ] movprfx\t%0, %1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ }
+)
+
+(define_insn_and_split "*aarch64_sve_fclamp<mode>_x"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand")
+ (unspec:SVE_FULL_F
+ [(match_operand 4)
+ (const_int SVE_RELAXED_GP)
+ (unspec:SVE_FULL_F
+ [(match_operand 5)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_FULL_F 1 "register_operand")
+ (match_operand:SVE_FULL_F 2 "register_operand")]
+ UNSPEC_COND_FMAXNM)
+ (match_operand:SVE_FULL_F 3 "register_operand")]
+ UNSPEC_COND_FMINNM))]
+ "TARGET_STREAMING_SME"
+ {@ [cons: =0, 1, 2, 3; attrs: movprfx]
+ [ w, %0, w, w; * ] #
+ [ ?&w, w, w, w; yes ] #
+ }
+ "&& true"
+ [(set (match_dup 0)
+ (unspec:SVE_FULL_F
+ [(unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_dup 2)]
+ UNSPEC_FMAXNM)
+ (match_dup 3)]
+ UNSPEC_FMINNM))]
+)
+
+(define_insn "@aarch64_sve_fclamp_single<mode>"
+ [(set (match_operand:SVE_Fx24 0 "register_operand" "=Uw<vector_count>")
+ (unspec:SVE_Fx24
+ [(unspec:SVE_Fx24
+ [(match_operand:SVE_Fx24 1 "register_operand" "0")
+ (vec_duplicate:SVE_Fx24
+ (match_operand:<VSINGLE> 2 "register_operand" "w"))]
+ UNSPEC_FMAXNM)
+ (vec_duplicate:SVE_Fx24
+ (match_operand:<VSINGLE> 3 "register_operand" "w"))]
+ UNSPEC_FMINNM))]
+ "TARGET_STREAMING_SME2"
+ "fclamp\t%0, %2.<Vetype>, %3.<Vetype>"
+)
+
;; =========================================================================
;; == Uniform ternary arithmnetic
;; =========================================================================
;; == Extending arithmetic
;; =========================================================================
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multi-register widening conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUNPK
+;; - UUNPK
+;; -------------------------------------------------------------------------
+
+(define_insn "<optab><mode><v2xwide>2"
+ [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw2")
+ (ANY_EXTEND:<V2XWIDE>
+ (match_operand:SVE_FULL_BHSI 1 "register_operand" "w")))]
+ "TARGET_STREAMING_SME2"
+ "<su>unpk\t%0, %1.<Vetype>"
+)
+
+(define_insn "<optab><mode><v2xwide>2"
+ [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw4")
+ (ANY_EXTEND:<V2XWIDE>
+ (match_operand:SVE_FULL_BHSIx2 1 "aligned_register_operand" "Uw2")))]
+ "TARGET_STREAMING_SME2"
+ "<su>unpk\t%0, %1"
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Wide binary arithmetic
;; -------------------------------------------------------------------------
;; Includes:
;; - SABALB
;; - SABALT
+;; - SDOT (SME2 or SVE2p1)
;; - SMLALB
;; - SMLALT
;; - SMLSLB
;; - SQDMLSLT
;; - UABALB
;; - UABALT
+;; - UDOT (SME2 or SVE2p1)
;; - UMLALB
;; - UMLALT
;; - UMLSLB
[ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
}
)
+
+;; Two-way dot-product.
+(define_insn "@aarch64_sve_<sur>dotvnx4sivnx8hi"
+ [(set (match_operand:VNx4SI 0 "register_operand")
+ (plus:VNx4SI
+ (unspec:VNx4SI
+ [(match_operand:VNx8HI 1 "register_operand")
+ (match_operand:VNx8HI 2 "register_operand")]
+ DOTPROD)
+ (match_operand:VNx4SI 3 "register_operand")))]
+ "TARGET_STREAMING_SME2"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , w , w , 0 ; * ] <sur>dot\t%0.s, %1.h, %2.h
+ [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.h, %2.h
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Multi-register operations
+;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - FMAX
+;; - FMAXNM
+;; - FMIN
+;; - FMINNM
+;; -------------------------------------------------------------------------
+
+(define_expand "@aarch64_sve_<maxmin_uns_op><mode>"
+ [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_Fx24
+ [(match_operand:SVE_Fx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SVE_FP_BINARY_MULTI))]
+ "TARGET_STREAMING_SME2"
+)
+
+(define_insn "*aarch64_sve_<maxmin_uns_op><mode>"
+ [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_Fx24
+ [(match_operand:SVE_Fx24 1 "aligned_register_operand" "%0")
+ (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ SVE_FP_BINARY_MULTI))]
+ "TARGET_STREAMING_SME2"
+ "<maxmin_uns_op>\t%0, %0, %2"
+)
+
+(define_insn "@aarch64_sve_single_<maxmin_uns_op><mode>"
+ [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (unspec:SVE_Fx24
+ [(match_operand:SVE_Fx24 1 "aligned_register_operand" "0")
+ (vec_duplicate:SVE_Fx24
+ (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+ SVE_FP_BINARY_MULTI))]
+ "TARGET_STREAMING_SME2"
+ "<maxmin_uns_op>\t%0, %0, %2.<Vetype>"
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP] Long multiplication with accumulation
;; -------------------------------------------------------------------------
;; Includes:
+;; - FDOT (SME2 or SVE2p1)
;; - FMLALB
;; - FMLALT
;; - FMLSLB
}
)
+;; Two-way dot-product.
+(define_insn "aarch64_sve_fdotvnx4sfvnx8hf"
+ [(set (match_operand:VNx4SF 0 "register_operand")
+ (plus:VNx4SF
+ (unspec:VNx4SF
+ [(match_operand:VNx8HF 1 "register_operand")
+ (match_operand:VNx8HF 2 "register_operand")]
+ UNSPEC_FDOT)
+ (match_operand:VNx4SF 3 "register_operand")))]
+ "TARGET_STREAMING_SME2"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , w , w , 0 ; * ] fdot\t%0.s, %1.h, %2.h
+ [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;fdot\t%0.s, %1.h, %2.h
+ }
+)
+
+(define_insn "aarch64_fdot_prod_lanevnx4sfvnx8hf"
+ [(set (match_operand:VNx4SF 0 "register_operand")
+ (plus:VNx4SF
+ (unspec:VNx4SF
+ [(match_operand:VNx8HF 1 "register_operand")
+ (unspec:VNx8HF
+ [(match_operand:VNx8HF 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ UNSPEC_FDOT)
+ (match_operand:VNx4SF 4 "register_operand")))]
+ "TARGET_STREAMING_SME2"
+ {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
+ [ w , w , y , 0 ; * ] fdot\t%0.s, %1.h, %2.h[%3]
+ [ ?&w , w , y , w ; yes ] movprfx\t%0, %4\;fdot\t%0.s, %1.h, %2.h[%3]
+ }
+)
+
;; =========================================================================
;; == Narrowing arithnetic
;; =========================================================================
"<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multi-vector narrowing unary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQCVT
+;; - SQCVTN
+;; - UQCVT
+;; - UQCVTN
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><VNx16QI_ONLY:mode><VNx16SI_ONLY:mode>"
+ [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+ (unspec:VNx16QI_ONLY
+ [(match_operand:VNx16SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
+ SVE_QCVTxN))]
+ "TARGET_SME2 && TARGET_STREAMING"
+ "<optab>\t%0.b, %1"
+)
+
+(define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8SI_ONLY:mode>"
+ [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w")
+ (unspec:VNx8HI_ONLY
+ [(match_operand:VNx8SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
+ SVE_QCVTxN))]
+ "TARGET_SME2 && TARGET_STREAMING"
+ "<optab>\t%0.h, %1"
+)
+
+(define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8DI_ONLY:mode>"
+ [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w")
+ (unspec:VNx8HI_ONLY
+ [(match_operand:VNx8DI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
+ SVE_QCVTxN))]
+ "TARGET_SME2 && TARGET_STREAMING"
+ "<optab>\t%0.h, %1"
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Narrowing binary arithmetic
;; -------------------------------------------------------------------------
"<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multi-vector narrowing right shifts
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:SVE_FULL_SIx2_SDIx4 1 "register_operand" "Uw<vector_count>")
+ (match_operand:DI 2 "const_int_operand")]
+ SVE2_INT_SHIFT_IMM_NARROWxN))]
+ "TARGET_STREAMING_SME2"
+ "<sve_int_op>\t%0.<Ventype>, %1, #%2"
+)
+
;; =========================================================================
;; == Pairwise arithmetic
;; =========================================================================
"fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
)
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Multi-vector narrowing conversions
+;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - BFCVT
+;; - BFCVTN
+;; - FCVT
+;; - FCVTN
+;; -------------------------------------------------------------------------
+
+(define_insn "truncvnx8sf<mode>2"
+ [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
+ (float_truncate:SVE_FULL_HF
+ (match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")))]
+ "TARGET_STREAMING_SME2"
+ "<b>fcvt\t%0.h, %1"
+)
+
+(define_insn "@aarch64_sve_cvtn<mode>"
+ [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HF
+ [(match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")]
+ UNSPEC_FCVTN))]
+ "TARGET_STREAMING_SME2"
+ "<b>fcvtn\t%0.h, %1"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Multi-vector conversions
+;; -------------------------------------------------------------------------
+
+(define_insn "<optab><v_int_equiv><mode>2"
+ [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>")
+ (FLOATUORS:SVE_SFx24
+ (match_operand:<V_INT_EQUIV> 1 "aligned_register_operand" "Uw<vector_count>")))]
+ "TARGET_STREAMING_SME2"
+ "<su_optab>cvtf\t%0, %1"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Multi-vector conversions
+;; -------------------------------------------------------------------------
+
+(define_insn "<optab><mode><v_int_equiv>2"
+ [(set (match_operand:<V_INT_EQUIV> 0 "aligned_register_operand" "=Uw<vector_count>")
+ (FIXUORS:<V_INT_EQUIV>
+ (match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")))]
+ "TARGET_STREAMING_SME2"
+ "fcvtz<su>\t%0, %1"
+)
+
;; =========================================================================
;; == Other arithmetic
;; =========================================================================
"<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
)
+;; =========================================================================
+;; == Comparisons and selects
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Select based on predicates as counters
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_sel<mode>"
+ [(set (match_operand:SVE_FULLx24 0 "register_operand" "=Uw<vector_count>")
+ (unspec:SVE_FULLx24
+ [(match_operand:<VPRED> 3 "register_operand" "Uph")
+ (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
+ (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")]
+ UNSPEC_SEL))]
+ "TARGET_STREAMING_SME2"
+ "sel\t%0, %K3, %1, %2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] While tests
+;; -------------------------------------------------------------------------
+;; Includes the x2 and count versions of:
+;; - WHILEGE
+;; - WHILEGT
+;; - WHILEHI
+;; - WHILEHS
+;; - WHILELE
+;; - WHILELO
+;; - WHILELS
+;; - WHILELT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_while<while_optab_cmp>_b<BHSD_BITS>_x2"
+ [(set (match_operand:VNx32BI 0 "register_operand" "=Up2")
+ (unspec:VNx32BI
+ [(const_int SVE_WHILE_B_X2)
+ (match_operand:DI 1 "aarch64_reg_or_zero" "rZ")
+ (match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
+ (const_int BHSD_BITS)]
+ SVE_WHILE_ORDER))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_STREAMING_SME2"
+ "while<cmp_op>\t{%S0.<bits_etype>, %T0.<bits_etype>}, %x1, %x2"
+)
+
+(define_insn "@aarch64_sve_while<while_optab_cmp>_c<BHSD_BITS>"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
+ (unspec:VNx16BI
+ [(const_int SVE_WHILE_C)
+ (match_operand:DI 1 "aarch64_reg_or_zero" "rZ")
+ (match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
+ (const_int BHSD_BITS)
+ (match_operand:DI 3 "const_int_operand")]
+ SVE_WHILE_ORDER))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_STREAMING_SME2"
+ "while<cmp_op>\t%K0.<bits_etype>, %x1, %x2, vlx%3"
+)
+
;; =========================================================================
;; == Permutation
;; =========================================================================
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Reversal
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - REVD
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_<optab><mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand")
+ (unspec:SVE_FULL
+ [(match_operand:VNx2BI 1 "register_operand")
+ (unspec:SVE_FULL
+ [(match_operand:SVE_FULL 2 "register_operand")]
+ UNSPEC_REVD_ONLY)]
+ UNSPEC_PRED_X))]
+ "TARGET_STREAMING_SME"
+ {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
+ [ w , Upl , 0 ; * ] revd\t%0.q, %1/m, %2.q
+ [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q
+ }
+)
+
+(define_insn "@cond_<optab><mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand")
+ (unspec:SVE_FULL
+ [(match_operand:VNx2BI 1 "register_operand")
+ (unspec:SVE_FULL
+ [(match_operand:SVE_FULL 2 "register_operand")]
+ UNSPEC_REVD_ONLY)
+ (match_operand:SVE_FULL 3 "register_operand")]
+ UNSPEC_SEL))]
+ "TARGET_STREAMING_SME"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , Upl , w , 0 ; * ] revd\t%0.q, %1/m, %2.q
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q
+ }
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT,FP] General permutes
;; -------------------------------------------------------------------------
"tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
)
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Multi-register permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ZIP
+;; - UZP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2")
+ (unspec:SVE_FULLx2
+ [(match_operand:<VSINGLE> 1 "register_operand" "w")
+ (match_operand:<VSINGLE> 2 "register_operand" "w")]
+ SVE2_x24_PERMUTE))]
+ "TARGET_STREAMING_SME2"
+ "<perm_insn>\t%0, %1.<Vetype>, %2.<Vetype>"
+)
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2")
+ (unspec:SVE_FULLx2
+ [(match_operand:<VSINGLE> 1 "register_operand" "w")
+ (match_operand:<VSINGLE> 2 "register_operand" "w")]
+ SVE2_x24_PERMUTEQ))]
+ "TARGET_STREAMING_SME2"
+ "<perm_insn>\t{%S0.q - %T0.q}, %1.q, %2.q"
+)
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4")
+ (unspec:SVE_FULLx4
+ [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")]
+ SVE2_x24_PERMUTE))]
+ "TARGET_STREAMING_SME2"
+ "<perm_insn>\t%0, %1"
+)
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4")
+ (unspec:SVE_FULLx4
+ [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")]
+ SVE2_x24_PERMUTEQ))]
+ "TARGET_STREAMING_SME2"
+ "<perm_insn>\t{%S0.q - %V0.q}, {%S1.q - %V1.q}"
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Optional bit-permute extensions
;; -------------------------------------------------------------------------
'0': Print a normal operand, if it's a general register,
then we assume DImode.
'k': Print NZCV for conditional compare instructions.
+ 'K': Print a predicate register as pn<N> rather than p<N>
'A': Output address constant representing the first
argument of X, specifying a relocation offset
if appropriate.
case 'T':
case 'U':
case 'V':
- if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
+ if (!REG_P (x) || (!FP_REGNUM_P (REGNO (x)) && !PR_REGNUM_P (REGNO (x))))
{
- output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
+ output_operand_lossage ("incompatible operand for '%%%c'", code);
return;
}
- asm_fprintf (f, "%c%d",
- aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
- REGNO (x) - V0_REGNUM + (code - 'S'));
+ if (PR_REGNUM_P (REGNO (x)))
+ asm_fprintf (f, "p%d", REGNO (x) - P0_REGNUM + (code - 'S'));
+ else
+ asm_fprintf (f, "%c%d",
+ aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
+ REGNO (x) - V0_REGNUM + (code - 'S'));
break;
case 'R':
}
break;
+ case 'K':
+ if (!REG_P (x) || !PR_REGNUM_P (REGNO (x)))
+ {
+ output_operand_lossage ("invalid operand for '%%%c'", code);
+ return;
+ }
+ asm_fprintf (f, "pn%d", REGNO (x) - P0_REGNUM);
+ break;
+
case 'y':
case 'z':
{
enum reg_class
aarch64_regno_regclass (unsigned regno)
{
+ if (W8_W11_REGNUM_P (regno))
+ return W8_W11_REGS;
+
if (W12_W15_REGNUM_P (regno))
return W12_W15_REGS;
unsigned int nregs, vec_flags;
switch (regclass)
{
+ case W8_W11_REGS:
case W12_W15_REGS:
case STUB_REGS:
case TAILCALL_ADDR_REGS:
imply anything about the state of PSTATE.SM. */
#define TARGET_SME (AARCH64_ISA_SME)
-/* Streaming-mode SME instructions. */
+/* Same with streaming mode enabled. */
#define TARGET_STREAMING_SME (TARGET_STREAMING && TARGET_SME)
/* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64. */
/* SME2 instructions, enabled through +sme2. */
#define TARGET_SME2 (AARCH64_ISA_SME2)
+/* Same with streaming mode enabled. */
+#define TARGET_STREAMING_SME2 (TARGET_STREAMING && TARGET_SME2)
+
/* ARMv8.3-A features. */
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A)
{"b" # N, V0_REGNUM + (N)}, \
{"z" # N, V0_REGNUM + (N)}
+#define P_ALIASES(N) {"pn" # N, P0_REGNUM + (N)}
+
/* Provide aliases for all of the ISA defined register name forms.
These aliases are convenient for use in the clobber lists of inline
asm statements. */
V_ALIASES(16), V_ALIASES(17), V_ALIASES(18), V_ALIASES(19), \
V_ALIASES(20), V_ALIASES(21), V_ALIASES(22), V_ALIASES(23), \
V_ALIASES(24), V_ALIASES(25), V_ALIASES(26), V_ALIASES(27), \
- V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31) \
+ V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31), \
+ P_ALIASES(0), P_ALIASES(1), P_ALIASES(2), P_ALIASES(3), \
+ P_ALIASES(4), P_ALIASES(5), P_ALIASES(6), P_ALIASES(7), \
+ P_ALIASES(8), P_ALIASES(9), P_ALIASES(10), P_ALIASES(11), \
+ P_ALIASES(12), P_ALIASES(13), P_ALIASES(14), P_ALIASES(15) \
}
#define EPILOGUE_USES(REGNO) (aarch64_epilogue_uses (REGNO))
&& (REGNO) != R17_REGNUM \
&& (REGNO) != R30_REGNUM) \
+#define W8_W11_REGNUM_P(REGNO) \
+ IN_RANGE (REGNO, R8_REGNUM, R11_REGNUM)
+
#define W12_W15_REGNUM_P(REGNO) \
IN_RANGE (REGNO, R12_REGNUM, R15_REGNUM)
enum reg_class
{
NO_REGS,
+ W8_W11_REGS,
W12_W15_REGS,
TAILCALL_ADDR_REGS,
STUB_REGS,
#define REG_CLASS_NAMES \
{ \
"NO_REGS", \
+ "W8_W11_REGS", \
"W12_W15_REGS", \
"TAILCALL_ADDR_REGS", \
"STUB_REGS", \
#define REG_CLASS_CONTENTS \
{ \
{ 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \
+ { 0x00000f00, 0x00000000, 0x00000000 }, /* W8_W11_REGS */ \
{ 0x0000f000, 0x00000000, 0x00000000 }, /* W12_W15_REGS */ \
{ 0x00030000, 0x00000000, 0x00000000 }, /* TAILCALL_ADDR_REGS */\
{ 0x3ffcffff, 0x00000000, 0x00000000 }, /* STUB_REGS */ \
UNSPEC_NZCV
UNSPEC_XPACLRI
UNSPEC_LD1_SVE
+ UNSPEC_LD1_SVE_COUNT
UNSPEC_ST1_SVE
+ UNSPEC_ST1_SVE_COUNT
UNSPEC_LDNT1_SVE
+ UNSPEC_LDNT1_SVE_COUNT
UNSPEC_STNT1_SVE
+ UNSPEC_STNT1_SVE_COUNT
UNSPEC_LD1RQ
UNSPEC_LD1_GATHER
UNSPEC_LDFF1_GATHER
(define_mode_attr cas_short_expected_pred
[(QI "aarch64_reg_or_imm") (HI "aarch64_plushi_operand")])
(define_mode_attr cas_short_expected_imm
- [(QI "n") (HI "Uph")])
+ [(QI "n") (HI "Uih")])
(define_insn_and_split "@aarch64_compare_and_swap<mode>"
[(set (reg:CC CC_REGNUM) ;; bool out
(define_register_constraint "k" "STACK_REG"
"@internal The stack register.")
+(define_register_constraint "Uci" "W8_W11_REGS"
+ "@internal r8-r11, which can be used to index ZA.")
+
(define_register_constraint "Ucj" "W12_W15_REGS"
"@internal r12-r15, which can be used to index ZA.")
(define_register_constraint "w" "FP_REGS"
"Floating point and SIMD vector registers.")
+(define_register_constraint "x" "FP_LO_REGS"
+ "Floating point and SIMD vector registers V0 - V15.")
+
+(define_register_constraint "y" "FP_LO8_REGS"
+ "Floating point and SIMD vector registers V0 - V7.")
+
+(define_register_constraint "Uw2" "FP_REGS"
+ "Even floating point and SIMD vector registers."
+ "regno % 2 == 0")
+
+(define_register_constraint "Uw4" "FP_REGS"
+ "4-tuple-aligned floating point and SIMD vector registers."
+ "regno % 4 == 0")
+
(define_register_constraint "Upa" "PR_REGS"
"SVE predicate registers p0 - p15.")
(define_register_constraint "Upl" "PR_LO_REGS"
"SVE predicate registers p0 - p7.")
-(define_register_constraint "x" "FP_LO_REGS"
- "Floating point and SIMD vector registers V0 - V15.")
-
-(define_register_constraint "y" "FP_LO8_REGS"
- "Floating point and SIMD vector registers V0 - V7.")
+(define_register_constraint "Uph" "PR_HI_REGS"
+ "SVE predicate registers p8 - p15.")
(define_constraint "c"
"@internal The condition code register."
(and (match_code "const_int")
(match_test "(unsigned) exact_log2 (ival) <= 4")))
-(define_constraint "Uph"
+(define_constraint "Uih"
"@internal
A constraint that matches HImode integers zero extendable to
SImode plus_operand."
;; Iterators for single modes, for "@" patterns.
(define_mode_iterator VNx16QI_ONLY [VNx16QI])
+(define_mode_iterator VNx16SI_ONLY [VNx16SI])
(define_mode_iterator VNx8HI_ONLY [VNx8HI])
(define_mode_iterator VNx8BF_ONLY [VNx8BF])
+(define_mode_iterator VNx8SI_ONLY [VNx8SI])
+(define_mode_iterator VNx8DI_ONLY [VNx8DI])
(define_mode_iterator VNx4SI_ONLY [VNx4SI])
(define_mode_iterator VNx4SF_ONLY [VNx4SF])
(define_mode_iterator VNx2DI_ONLY [VNx2DI])
;; elements.
(define_mode_iterator SVE_FULL_BHSI [VNx16QI VNx8HI VNx4SI])
+;; Pairs of the above.
+(define_mode_iterator SVE_FULL_BHSIx2 [VNx32QI VNx16HI VNx8SI])
+
+;; Fully-packed SVE vector modes that have 16-bit float elements.
+(define_mode_iterator SVE_FULL_HF [VNx8BF VNx8HF])
+
;; Fully-packed SVE vector modes that have 16-bit, 32-bit or 64-bit elements.
(define_mode_iterator SVE_FULL_HSD [VNx8HI VNx4SI VNx2DI
VNx8BF VNx8HF VNx4SF VNx2DF])
;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements.
(define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI])
+;; 2x and 4x tuples of the above, excluding 2x DI.
+(define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI])
+
;; Fully-packed SVE floating-point vector modes that have 32-bit or 64-bit
;; elements.
(define_mode_iterator SVE_FULL_SDF [VNx4SF VNx2DF])
(define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM")
(VNx2DF "TARGET_SVE_F64MM")])
+;; Fully-packed SVE vector modes that have 32-bit or smaller elements.
+(define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI
+ VNx8BF VNx8HF VNx4SF])
+
;; Fully-packed SVE vector modes that have 32-bit elements.
(define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF])
(define_mode_iterator SVE_FULLx4 [VNx64QI VNx32HI VNx16SI VNx8DI
VNx32BF VNx32HF VNx16SF VNx8DF])
+(define_mode_iterator SVE_FULLx24 [SVE_FULLx2 SVE_FULLx4])
+
;; All SVE vector structure modes.
(define_mode_iterator SVE_STRUCT [SVE_FULLx2 SVE_FULLx3 SVE_FULLx4])
VNx4SI VNx2SI
VNx2DI])
+(define_mode_iterator SVE_DIx24 [VNx4DI VNx8DI])
+
;; SVE modes with 2 or 4 elements.
(define_mode_iterator SVE_24 [VNx2QI VNx2HI VNx2HF VNx2BF VNx2SI VNx2SF
VNx2DI VNx2DF
;; Bfloat16 modes to which V4SF can be converted
(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
+(define_mode_iterator SVE_BHSx24 [VNx32QI VNx16HI VNx8SI
+ VNx16BF VNx16HF VNx8SF
+ VNx64QI VNx32HI VNx16SI
+ VNx32BF VNx32HF VNx16SF])
+
+(define_mode_iterator SVE_Ix24 [VNx32QI VNx16HI VNx8SI VNx4DI
+ VNx64QI VNx32HI VNx16SI VNx8DI])
+
+(define_mode_iterator SVE_Fx24 [VNx16HF VNx8SF VNx4DF
+ VNx32HF VNx16SF VNx8DF])
+
+(define_mode_iterator SVE_SFx24 [VNx8SF VNx16SF])
+
;; The modes used to represent different ZA access sizes.
(define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI])
(define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")])
(define_mode_iterator SME_ZA_SDF_I [VNx4SI (VNx2DI "TARGET_SME_F64F64")])
+(define_mode_iterator SME_ZA_BIx24 [VNx32QI VNx64QI])
+
+(define_mode_iterator SME_ZA_BHIx124 [VNx16QI VNx32QI VNx64QI
+ VNx8HI VNx16HI VNx32HI])
+
+(define_mode_iterator SME_ZA_BHIx24 [VNx32QI VNx64QI VNx16HI VNx32HI])
+
+(define_mode_iterator SME_ZA_HFx124 [VNx8BF VNx16BF VNx32BF
+ VNx8HF VNx16HF VNx32HF])
+
+(define_mode_iterator SME_ZA_HFx24 [VNx16BF VNx32BF VNx16HF VNx32HF])
+
+(define_mode_iterator SME_ZA_HIx124 [VNx8HI VNx16HI VNx32HI])
+
+(define_mode_iterator SME_ZA_HIx24 [VNx16HI VNx32HI])
+
+(define_mode_iterator SME_ZA_SDIx24 [VNx8SI (VNx4DI "TARGET_SME_I16I64")
+ VNx16SI (VNx8DI "TARGET_SME_I16I64")])
+
+(define_mode_iterator SME_ZA_SDFx24 [VNx8SF (VNx4DF "TARGET_SME_F64F64")
+ VNx16SF (VNx8DF "TARGET_SME_F64F64")])
+
;; The modes for which outer product instructions are supported.
(define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")])
(define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF
UNSPEC_IORF ; Used in aarch64-sve.md.
UNSPEC_XORF ; Used in aarch64-sve.md.
UNSPEC_REVB ; Used in aarch64-sve.md.
+ UNSPEC_REVD ; Used in aarch64-sve2.md.
UNSPEC_REVH ; Used in aarch64-sve.md.
UNSPEC_REVW ; Used in aarch64-sve.md.
UNSPEC_REVBHW ; Used in aarch64-sve.md.
UNSPEC_CMLA180_CONJ ; Used in aarch64-sve2.md.
UNSPEC_CMUL ; Used in aarch64-sve2.md.
UNSPEC_CMUL_CONJ ; Used in aarch64-sve2.md.
+ UNSPEC_CNTP_C ; Used in aarch64-sve2.md.
UNSPEC_COND_FCVTLT ; Used in aarch64-sve2.md.
UNSPEC_COND_FCVTNT ; Used in aarch64-sve2.md.
UNSPEC_COND_FCVTX ; Used in aarch64-sve2.md.
UNSPEC_HISTSEG ; Used in aarch64-sve2.md.
UNSPEC_MATCH ; Used in aarch64-sve2.md.
UNSPEC_NMATCH ; Used in aarch64-sve2.md.
+ UNSPEC_PEXT ; Used in aarch64-sve2.md.
+ UNSPEC_PEXTx2 ; Used in aarch64-sve2.md.
UNSPEC_PMULLB ; Used in aarch64-sve2.md.
UNSPEC_PMULLB_PAIR ; Used in aarch64-sve2.md.
UNSPEC_PMULLT ; Used in aarch64-sve2.md.
UNSPEC_PMULLT_PAIR ; Used in aarch64-sve2.md.
+ UNSPEC_PSEL ; Used in aarch64-sve2.md.
+ UNSPEC_PTRUE_C ; Used in aarch64-sve2.md.
UNSPEC_RADDHNB ; Used in aarch64-sve2.md.
UNSPEC_RADDHNT ; Used in aarch64-sve2.md.
UNSPEC_RSHRNB ; Used in aarch64-sve2.md.
UNSPEC_SQRDCMLAH180 ; Used in aarch64-sve2.md.
UNSPEC_SQRDCMLAH270 ; Used in aarch64-sve2.md.
UNSPEC_SQRDCMLAH90 ; Used in aarch64-sve2.md.
+ UNSPEC_SQRSHR ; Used in aarch64-sve2.md.
+ UNSPEC_SQRSHRN ; Used in aarch64-sve2.md.
UNSPEC_SQRSHRNB ; Used in aarch64-sve2.md.
UNSPEC_SQRSHRNT ; Used in aarch64-sve2.md.
+ UNSPEC_SQRSHRU ; Used in aarch64-sve2.md.
+ UNSPEC_SQRSHRUN ; Used in aarch64-sve2.md.
UNSPEC_SQRSHRUNB ; Used in aarch64-sve2.md.
UNSPEC_SQRSHRUNT ; Used in aarch64-sve2.md.
UNSPEC_SQSHRNB ; Used in aarch64-sve2.md.
UNSPEC_UMULHS ; Used in aarch64-sve2.md.
UNSPEC_UMULLB ; Used in aarch64-sve2.md.
UNSPEC_UMULLT ; Used in aarch64-sve2.md.
+ UNSPEC_UQRSHR ; Used in aarch64-sve2.md.
+ UNSPEC_UQRSHRN ; Used in aarch64-sve2.md.
UNSPEC_UQRSHRNB ; Used in aarch64-sve2.md.
UNSPEC_UQRSHRNT ; Used in aarch64-sve2.md.
UNSPEC_UQSHRNB ; Used in aarch64-sve2.md.
UNSPEC_USUBWB ; Used in aarch64-sve2.md.
UNSPEC_USUBWT ; Used in aarch64-sve2.md.
UNSPEC_USDOT ; Used in aarch64-simd.md.
+ UNSPEC_UZP ; Used in aarch64-sve2.md.
+ UNSPEC_UZPQ ; Used in aarch64-sve2.md.
+ UNSPEC_ZIP ; Used in aarch64-sve2.md.
+ UNSPEC_ZIPQ ; Used in aarch64-sve2.md.
UNSPEC_SUDOT ; Used in aarch64-simd.md.
UNSPEC_BFDOT ; Used in aarch64-simd.md.
UNSPEC_BFMLALB ; Used in aarch64-sve.md.
UNSPEC_BFMLALT ; Used in aarch64-sve.md.
+ UNSPEC_BFMLSLB ; Used in aarch64-sve.md.
+ UNSPEC_BFMLSLT ; Used in aarch64-sve.md.
UNSPEC_BFMMLA ; Used in aarch64-sve.md.
UNSPEC_BFCVTN ; Used in aarch64-simd.md.
UNSPEC_BFCVTN2 ; Used in aarch64-simd.md.
UNSPEC_BFCVT ; Used in aarch64-simd.md.
UNSPEC_FCVTXN ; Used in aarch64-simd.md.
+ ;; All used in aarch64-sve2.md
+ UNSPEC_FCVTN
+ UNSPEC_FDOT
+ UNSPEC_SQCVT
+ UNSPEC_SQCVTN
+ UNSPEC_SQCVTU
+ UNSPEC_SQCVTUN
+ UNSPEC_UQCVT
+ UNSPEC_UQCVTN
+
;; All used in aarch64-sme.md
+ UNSPEC_SME_ADD
+ UNSPEC_SME_ADD_WRITE
UNSPEC_SME_ADDHA
UNSPEC_SME_ADDVA
+ UNSPEC_SME_BMOPA
+ UNSPEC_SME_BMOPS
+ UNSPEC_SME_FADD
+ UNSPEC_SME_FDOT
+ UNSPEC_SME_FVDOT
+ UNSPEC_SME_FMLA
+ UNSPEC_SME_FMLS
UNSPEC_SME_FMOPA
UNSPEC_SME_FMOPS
+ UNSPEC_SME_FSUB
UNSPEC_SME_LD1_HOR
UNSPEC_SME_LD1_VER
+ UNSPEC_SME_READ
UNSPEC_SME_READ_HOR
UNSPEC_SME_READ_VER
+ UNSPEC_SME_SDOT
+ UNSPEC_SME_SVDOT
+ UNSPEC_SME_SMLA
+ UNSPEC_SME_SMLS
UNSPEC_SME_SMOPA
UNSPEC_SME_SMOPS
UNSPEC_SME_ST1_HOR
UNSPEC_SME_ST1_VER
+ UNSPEC_SME_SUB
+ UNSPEC_SME_SUB_WRITE
+ UNSPEC_SME_SUDOT
+ UNSPEC_SME_SUVDOT
UNSPEC_SME_SUMOPA
UNSPEC_SME_SUMOPS
+ UNSPEC_SME_UDOT
+ UNSPEC_SME_UVDOT
+ UNSPEC_SME_UMLA
+ UNSPEC_SME_UMLS
UNSPEC_SME_UMOPA
UNSPEC_SME_UMOPS
+ UNSPEC_SME_USDOT
+ UNSPEC_SME_USVDOT
UNSPEC_SME_USMOPA
UNSPEC_SME_USMOPS
+ UNSPEC_SME_WRITE
UNSPEC_SME_WRITE_HOR
UNSPEC_SME_WRITE_VER
])
(VNx2DI "d")
(VNx2DF "d")
(VNx1TI "q")
+ (VNx32QI "b") (VNx64QI "b")
+ (VNx16HI "h") (VNx32HI "h")
+ (VNx16HF "h") (VNx32HF "h")
+ (VNx16BF "h") (VNx32BF "h")
+ (VNx8SI "s") (VNx16SI "s")
+ (VNx8SF "s") (VNx16SF "s")
+ (VNx4DI "d") (VNx8DI "d")
+ (VNx4DF "d") (VNx8DF "d")
(BF "h") (V4BF "h") (V8BF "h")
(HF "h")
(SF "s") (DF "d")
;; Narrowed modes of vector modes.
(define_mode_attr VNARROW [(VNx8HI "VNx16QI")
(VNx4SI "VNx8HI") (VNx4SF "VNx8HF")
- (VNx2DI "VNx4SI") (VNx2DF "VNx4SF")])
+ (VNx2DI "VNx4SI") (VNx2DF "VNx4SF")
+ (VNx8SI "VNx8HI") (VNx16SI "VNx16QI")
+ (VNx8DI "VNx8HI")])
;; Register suffix narrowed modes for VQN.
(define_mode_attr Vntype [(V8HI "8b") (V4SI "4h")
(V16QI "V16HI") (V8HI "V8SI")
(V2SI "V2DI") (V4SI "V4DI")
(V2DI "V2TI") (DI "TI")
- (HI "SI") (SI "DI")])
+ (HI "SI") (SI "DI")
+ (VNx16QI "VNx16HI")
+ (VNx8HI "VNx8SI")
+ (VNx4SI "VNx4DI")
+ (VNx32QI "VNx32HI")
+ (VNx16HI "VNx16SI")
+ (VNx8SI "VNx8DI")])
+
+(define_mode_attr v2xwide [(V8QI "v8hi") (V4HI "v4si")
+ (V16QI "v16hi") (V8HI "v8si")
+ (V2SI "v2di") (V4SI "v4di")
+ (V2DI "v2ti") (DI "ti")
+ (HI "si") (SI "di")
+ (VNx16QI "vnx16hi")
+ (VNx8HI "vnx8si")
+ (VNx4SI "vnx4di")
+ (VNx32QI "vnx32hi")
+ (VNx16HI "vnx16si")
+ (VNx8SI "vnx8di")])
;; Predicate mode associated with VWIDE.
(define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")])
;; SVE vector after narrowing.
(define_mode_attr Ventype [(VNx8HI "b")
(VNx4SI "h") (VNx4SF "h")
- (VNx2DI "s") (VNx2DF "s")])
+ (VNx2DI "s") (VNx2DF "s")
+ (VNx8SI "h") (VNx16SI "b")
+ (VNx8DI "h")])
;; SVE vector after widening.
(define_mode_attr Vewtype [(VNx16QI "h")
(VNx8BF "VNx8HI")
(VNx4SI "VNx4SI") (VNx4SF "VNx4SI")
(VNx2DI "VNx2DI") (VNx2DF "VNx2DI")
+ (VNx8SF "VNx8SI") (VNx16SF "VNx16SI")
])
;; Lower case mode with floating-point values replaced by like-sized integers.
(VNx8BF "vnx8hi")
(VNx4SI "vnx4si") (VNx4SF "vnx4si")
(VNx2DI "vnx2di") (VNx2DF "vnx2di")
+ (VNx8SF "vnx8si") (VNx16SF "vnx16si")
])
;; Floating-point equivalent of selected modes.
(VNx32HF "16") (VNx16SF "16") (VNx8DF "16")])
;; The type of a subvector in an SVE_STRUCT.
-(define_mode_attr VSINGLE [(VNx32QI "VNx16QI")
+(define_mode_attr VSINGLE [(VNx16QI "VNx16QI")
+ (VNx8BF "VNx8BF")
+ (VNx8HF "VNx8HF")
+ (VNx8HI "VNx8HI")
+ (VNx32QI "VNx16QI")
(VNx16HI "VNx8HI") (VNx16HF "VNx8HF")
(VNx16BF "VNx8BF")
(VNx8SI "VNx4SI") (VNx8SF "VNx4SF")
(VNx8DI "VNx2DI") (VNx8DF "VNx2DF")])
;; ...and again in lower case.
-(define_mode_attr vsingle [(VNx32QI "vnx16qi")
+(define_mode_attr vsingle [(VNx8HI "vnx8hi")
+ (VNx32QI "vnx16qi")
(VNx16HI "vnx8hi") (VNx16HF "vnx8hf")
(VNx16BF "vnx8bf")
(VNx8SI "vnx4si") (VNx8SF "vnx4sf")
(V4HF "<Vetype>[%4]") (V8HF "<Vetype>[%4]")
])
+(define_mode_attr za32_offset_range [(VNx16QI "0_to_12_step_4")
+ (VNx8BF "0_to_14_step_2")
+ (VNx8HF "0_to_14_step_2")
+ (VNx8HI "0_to_14_step_2")
+ (VNx32QI "0_to_4_step_4")
+ (VNx16BF "0_to_6_step_2")
+ (VNx16HF "0_to_6_step_2")
+ (VNx16HI "0_to_6_step_2")
+ (VNx64QI "0_to_4_step_4")
+ (VNx32BF "0_to_6_step_2")
+ (VNx32HF "0_to_6_step_2")
+ (VNx32HI "0_to_6_step_2")])
+
+(define_mode_attr za64_offset_range [(VNx8HI "0_to_12_step_4")
+ (VNx16HI "0_to_4_step_4")
+ (VNx32HI "0_to_4_step_4")])
+
+(define_mode_attr za32_long [(VNx16QI "ll") (VNx32QI "ll") (VNx64QI "ll")
+ (VNx8HI "l") (VNx16HI "l") (VNx32HI "l")])
+
+(define_mode_attr za32_last_offset [(VNx16QI "3") (VNx32QI "3") (VNx64QI "3")
+ (VNx8HI "1") (VNx16HI "1") (VNx32HI "1")])
+
+(define_mode_attr vg_modifier [(VNx16QI "")
+ (VNx32QI ", vgx2")
+ (VNx64QI ", vgx4")
+ (VNx8BF "")
+ (VNx16BF ", vgx2")
+ (VNx32BF ", vgx4")
+ (VNx8HF "")
+ (VNx16HF ", vgx2")
+ (VNx32HF ", vgx4")
+ (VNx8HI "")
+ (VNx16HI ", vgx2")
+ (VNx32HI ", vgx4")])
+
+(define_mode_attr z_suffix [(VNx16QI ".b") (VNx32QI "") (VNx64QI "")
+ (VNx8BF ".h") (VNx16BF "") (VNx32BF "")
+ (VNx8HF ".h") (VNx16HF "") (VNx32HF "")
+ (VNx8HI ".h") (VNx16HI "") (VNx32HI "")])
+
;; The number of bytes controlled by a predicate
(define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2")
(VNx4BI "4") (VNx2BI "8")])
(V8HI "vec") (V2SI "vec") (V4SI "vec")
(V2DI "vec") (DI "offset")])
-(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")])
+(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")
+ (VNx16BF "b") (VNx16HF "")
+ (VNx32BF "b") (VNx32HF "")])
+
+(define_mode_attr aligned_operand [(VNx16QI "register_operand")
+ (VNx8HI "register_operand")
+ (VNx8BF "register_operand")
+ (VNx8HF "register_operand")
+ (VNx32QI "aligned_register_operand")
+ (VNx16HI "aligned_register_operand")
+ (VNx16BF "aligned_register_operand")
+ (VNx16HF "aligned_register_operand")
+ (VNx64QI "aligned_register_operand")
+ (VNx32HI "aligned_register_operand")
+ (VNx32BF "aligned_register_operand")
+ (VNx32HF "aligned_register_operand")])
+
+(define_mode_attr aligned_fpr [(VNx16QI "w") (VNx8HI "w")
+ (VNx8BF "w") (VNx8HF "w")
+ (VNx32QI "Uw2") (VNx16HI "Uw2")
+ (VNx16BF "Uw2") (VNx16HF "Uw2")
+ (VNx64QI "Uw4") (VNx32HI "Uw4")
+ (VNx32BF "Uw4") (VNx32HF "Uw4")])
;; -------------------------------------------------------------------
;; Code Iterators
;; SVE integer binary operations that have an immediate form.
(define_code_iterator SVE_INT_BINARY_IMM [mult smax smin umax umin])
+(define_code_iterator SVE_INT_BINARY_MULTI [smax smin umax umin])
+
+(define_code_iterator SVE_INT_BINARY_SINGLE [plus smax smin umax umin])
+
;; SVE floating-point operations with an unpredicated all-register form.
(define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult])
(UNSPEC_SRSHR "TARGET_SVE2")
(UNSPEC_URSHR "TARGET_SVE2")])
+(define_int_iterator SVE_INT_BINARY_MULTI [UNSPEC_SQDMULH
+ UNSPEC_SRSHL UNSPEC_URSHL])
+
(define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS])
(define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
-(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT
- UNSPEC_BFMLALB
- UNSPEC_BFMLALT
- (UNSPEC_BFMMLA "TARGET_NON_STREAMING")])
+(define_int_iterator SVE_FP_BINARY_MULTI [UNSPEC_FMAX UNSPEC_FMAXNM
+ UNSPEC_FMIN UNSPEC_FMINNM])
+
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG
+ [UNSPEC_BFDOT
+ UNSPEC_BFMLALB
+ UNSPEC_BFMLALT
+ (UNSPEC_BFMLSLB "TARGET_SME2 && TARGET_STREAMING_SME")
+ (UNSPEC_BFMLSLT "TARGET_SME2 && TARGET_STREAMING_SME")
+ (UNSPEC_BFMMLA "TARGET_NON_STREAMING")])
-(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT
- UNSPEC_BFMLALB
- UNSPEC_BFMLALT])
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE
+ [UNSPEC_BFDOT
+ UNSPEC_BFMLALB
+ UNSPEC_BFMLALT
+ (UNSPEC_BFMLSLB "TARGET_SME2 && TARGET_STREAMING_SME")
+ (UNSPEC_BFMLSLT "TARGET_SME2 && TARGET_STREAMING_SME")])
(define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
UNSPEC_IORV
(define_int_iterator SVE2_WHILE_PTR [UNSPEC_WHILERW UNSPEC_WHILEWR])
+(define_int_iterator SVE_WHILE_ORDER [UNSPEC_WHILEGE UNSPEC_WHILEGT
+ UNSPEC_WHILEHI UNSPEC_WHILEHS
+ UNSPEC_WHILELE UNSPEC_WHILELO
+ UNSPEC_WHILELS UNSPEC_WHILELT])
+
(define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE
UNSPEC_ASHIFTRT_WIDE
UNSPEC_LSHIFTRT_WIDE])
UNSPEC_UQRSHRNT
UNSPEC_UQSHRNT])
+(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWxN [UNSPEC_SQRSHR
+ UNSPEC_SQRSHRN
+ UNSPEC_SQRSHRU
+ UNSPEC_SQRSHRUN
+ UNSPEC_UQRSHR
+ UNSPEC_UQRSHRN])
+
(define_int_iterator SVE2_INT_SHIFT_INSERT [UNSPEC_SLI UNSPEC_SRI])
(define_int_iterator SVE2_INT_CADD [UNSPEC_CADD90
(define_int_iterator SVE2_PMULL_PAIR [UNSPEC_PMULLB_PAIR UNSPEC_PMULLT_PAIR])
+(define_int_iterator SVE_QCVTxN [UNSPEC_SQCVT UNSPEC_SQCVTN
+ UNSPEC_SQCVTU UNSPEC_SQCVTUN
+ UNSPEC_UQCVT UNSPEC_UQCVTN])
+
+(define_int_iterator SVE2_SFx24_UNARY [UNSPEC_FRINTA UNSPEC_FRINTM
+ UNSPEC_FRINTN UNSPEC_FRINTP])
+
+(define_int_iterator SVE2_x24_PERMUTE [UNSPEC_ZIP UNSPEC_UZP])
+(define_int_iterator SVE2_x24_PERMUTEQ [UNSPEC_ZIPQ UNSPEC_UZPQ])
+
(define_int_iterator FCADD [UNSPEC_FCADD90
UNSPEC_FCADD270])
(define_int_iterator FCMUL_OP [UNSPEC_FCMUL
UNSPEC_FCMUL_CONJ])
+(define_int_iterator UNSPEC_REVD_ONLY [UNSPEC_REVD])
+
(define_int_iterator SME_LD1 [UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER])
(define_int_iterator SME_READ [UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER])
(define_int_iterator SME_ST1 [UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER])
UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS
UNSPEC_SME_USMOPA UNSPEC_SME_USMOPS])
+(define_int_iterator SME2_INT_MOP [UNSPEC_SME_SMOPA UNSPEC_SME_SMOPS
+ UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS])
+
(define_int_iterator SME_FP_MOP [UNSPEC_SME_FMOPA UNSPEC_SME_FMOPS])
+(define_int_iterator SME2_BMOP [UNSPEC_SME_BMOPA UNSPEC_SME_BMOPS])
+
+(define_int_iterator SME_BINARY_SLICE_SDI [UNSPEC_SME_ADD UNSPEC_SME_SUB])
+
+(define_int_iterator SME_BINARY_SLICE_SDF [UNSPEC_SME_FADD UNSPEC_SME_FSUB])
+
+(define_int_iterator SME_BINARY_WRITE_SLICE_SDI [UNSPEC_SME_ADD_WRITE
+ UNSPEC_SME_SUB_WRITE])
+
+(define_int_iterator SME_INT_DOTPROD [UNSPEC_SME_SDOT UNSPEC_SME_UDOT
+ UNSPEC_SME_USDOT])
+
+(define_int_iterator SME_INT_DOTPROD_LANE [UNSPEC_SME_SDOT UNSPEC_SME_SVDOT
+ UNSPEC_SME_UDOT UNSPEC_SME_UVDOT
+ UNSPEC_SME_SUDOT UNSPEC_SME_SUVDOT
+ UNSPEC_SME_USDOT UNSPEC_SME_USVDOT])
+
+(define_int_iterator SME_FP_DOTPROD [UNSPEC_SME_FDOT])
+
+(define_int_iterator SME_FP_DOTPROD_LANE [UNSPEC_SME_FDOT UNSPEC_SME_FVDOT])
+
+(define_int_iterator SME_INT_TERNARY_SLICE [UNSPEC_SME_SMLA UNSPEC_SME_SMLS
+ UNSPEC_SME_UMLA UNSPEC_SME_UMLS])
+
+(define_int_iterator SME_FP_TERNARY_SLICE [UNSPEC_SME_FMLA UNSPEC_SME_FMLS])
+
;; Iterators for atomic operations.
(define_int_iterator ATOMIC_LDOP
(define_int_iterator SUBDI_BITS [8 16 32])
+(define_int_iterator BHSD_BITS [8 16 32 64])
+
+(define_int_iterator LUTI_BITS [2 4])
+
;; -------------------------------------------------------------------
;; Int Iterators Attributes.
;; -------------------------------------------------------------------
(UNSPEC_RSQRTS "frsqrts")
(UNSPEC_RBIT "rbit")
(UNSPEC_REVB "revb")
+ (UNSPEC_REVD "revd")
(UNSPEC_REVH "revh")
(UNSPEC_REVW "revw")
(UNSPEC_UMAXV "umax")
(UNSPEC_PMULLT "pmullt")
(UNSPEC_PMULLT_PAIR "pmullt_pair")
(UNSPEC_SMATMUL "smatmul")
+ (UNSPEC_UZP "uzp")
+ (UNSPEC_UZPQ "uzpq")
+ (UNSPEC_ZIP "zip")
+ (UNSPEC_ZIPQ "zipq")
+ (UNSPEC_SME_ADD "add")
+ (UNSPEC_SME_ADD_WRITE "add_write")
(UNSPEC_SME_ADDHA "addha")
(UNSPEC_SME_ADDVA "addva")
+ (UNSPEC_SME_BMOPA "bmopa")
+ (UNSPEC_SME_BMOPS "bmops")
+ (UNSPEC_SME_FADD "fadd")
+ (UNSPEC_SME_FDOT "fdot")
+ (UNSPEC_SME_FVDOT "fvdot")
+ (UNSPEC_SME_FMLA "fmla")
+ (UNSPEC_SME_FMLS "fmls")
(UNSPEC_SME_FMOPA "fmopa")
(UNSPEC_SME_FMOPS "fmops")
+ (UNSPEC_SME_FSUB "fsub")
(UNSPEC_SME_LD1_HOR "ld1_hor")
(UNSPEC_SME_LD1_VER "ld1_ver")
(UNSPEC_SME_READ_HOR "read_hor")
(UNSPEC_SME_READ_VER "read_ver")
+ (UNSPEC_SME_SDOT "sdot")
+ (UNSPEC_SME_SVDOT "svdot")
+ (UNSPEC_SME_SMLA "smla")
+ (UNSPEC_SME_SMLS "smls")
(UNSPEC_SME_SMOPA "smopa")
(UNSPEC_SME_SMOPS "smops")
(UNSPEC_SME_ST1_HOR "st1_hor")
(UNSPEC_SME_ST1_VER "st1_ver")
+ (UNSPEC_SME_SUB "sub")
+ (UNSPEC_SME_SUB_WRITE "sub_write")
+ (UNSPEC_SME_SUDOT "sudot")
+ (UNSPEC_SME_SUVDOT "suvdot")
(UNSPEC_SME_SUMOPA "sumopa")
(UNSPEC_SME_SUMOPS "sumops")
+ (UNSPEC_SME_UDOT "udot")
+ (UNSPEC_SME_UVDOT "uvdot")
+ (UNSPEC_SME_UMLA "umla")
+ (UNSPEC_SME_UMLS "umls")
(UNSPEC_SME_UMOPA "umopa")
(UNSPEC_SME_UMOPS "umops")
+ (UNSPEC_SME_USDOT "usdot")
+ (UNSPEC_SME_USVDOT "usvdot")
(UNSPEC_SME_USMOPA "usmopa")
(UNSPEC_SME_USMOPS "usmops")
(UNSPEC_SME_WRITE_HOR "write_hor")
(UNSPEC_SME_WRITE_VER "write_ver")
(UNSPEC_SQCADD90 "sqcadd90")
(UNSPEC_SQCADD270 "sqcadd270")
+ (UNSPEC_SQCVT "sqcvt")
+ (UNSPEC_SQCVTN "sqcvtn")
+ (UNSPEC_SQCVTU "sqcvtu")
+ (UNSPEC_SQCVTUN "sqcvtun")
(UNSPEC_SQRDCMLAH "sqrdcmlah")
(UNSPEC_SQRDCMLAH90 "sqrdcmlah90")
(UNSPEC_SQRDCMLAH180 "sqrdcmlah180")
(UNSPEC_TRN1Q "trn1q")
(UNSPEC_TRN2Q "trn2q")
(UNSPEC_UMATMUL "umatmul")
+ (UNSPEC_UQCVT "uqcvt")
+ (UNSPEC_UQCVTN "uqcvtn")
(UNSPEC_USMATMUL "usmatmul")
(UNSPEC_UZP1Q "uzp1q")
(UNSPEC_UZP2Q "uzp2q")
(UNSPEC_TRN1 "trn1") (UNSPEC_TRN2 "trn2")
(UNSPEC_TRN1Q "trn1") (UNSPEC_TRN2Q "trn2")
(UNSPEC_UZP1 "uzp1") (UNSPEC_UZP2 "uzp2")
- (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")])
+ (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")
+ (UNSPEC_UZP "uzp") (UNSPEC_UZPQ "uzp")
+ (UNSPEC_ZIP "zip") (UNSPEC_ZIPQ "zip")])
; op code for REV instructions (size within which elements are reversed).
(define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32")
(UNSPEC_SQRDMLSH "sqrdmlsh")
(UNSPEC_SQRDMULH "sqrdmulh")
(UNSPEC_SQRSHL "sqrshl")
+ (UNSPEC_SQRSHR "sqrshr")
+ (UNSPEC_SQRSHRN "sqrshrn")
(UNSPEC_SQRSHRNB "sqrshrnb")
(UNSPEC_SQRSHRNT "sqrshrnt")
+ (UNSPEC_SQRSHRU "sqrshru")
+ (UNSPEC_SQRSHRUN "sqrshrun")
(UNSPEC_SQRSHRUNB "sqrshrunb")
(UNSPEC_SQRSHRUNT "sqrshrunt")
(UNSPEC_SQSHL "sqshl")
(UNSPEC_UMULLB "umullb")
(UNSPEC_UMULLT "umullt")
(UNSPEC_UQRSHL "uqrshl")
+ (UNSPEC_UQRSHR "uqrshr")
+ (UNSPEC_UQRSHRN "uqrshrn")
(UNSPEC_UQRSHRNB "uqrshrnb")
(UNSPEC_UQRSHRNT "uqrshrnt")
(UNSPEC_UQSHL "uqshl")
(define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot")
(UNSPEC_BFMLALB "bfmlalb")
(UNSPEC_BFMLALT "bfmlalt")
+ (UNSPEC_BFMLSLB "bfmlslb")
+ (UNSPEC_BFMLSLT "bfmlslt")
(UNSPEC_BFMMLA "bfmmla")
(UNSPEC_FRECPE "frecpe")
(UNSPEC_FRECPS "frecps")
(UNSPEC_COND_FMULX "fmulx")
(UNSPEC_COND_FSUB "fsubr")])
+(define_int_attr sme_int_op [(UNSPEC_SME_ADD_WRITE "add")
+ (UNSPEC_SME_SUB_WRITE "sub")])
+
(define_int_attr rot [(UNSPEC_CADD90 "90")
(UNSPEC_CADD270 "270")
(UNSPEC_CDOT "0")
(UNSPEC_SME_WRITE_HOR "h")
(UNSPEC_SME_WRITE_VER "v")])
+(define_int_attr has_16bit_form [(UNSPEC_SME_SDOT "true")
+ (UNSPEC_SME_SVDOT "true")
+ (UNSPEC_SME_UDOT "true")
+ (UNSPEC_SME_UVDOT "true")
+ (UNSPEC_SME_SUDOT "false")
+ (UNSPEC_SME_SUVDOT "false")
+ (UNSPEC_SME_USDOT "false")
+ (UNSPEC_SME_USVDOT "false")])
+
;; Iterators and attributes for fpcr fpsr getter setters
(define_int_iterator GET_FPSCR
(UNSPECV_GET_FPCR "fpcr")
(UNSPECV_SET_FPCR "fpcr")])
-(define_int_attr bits_etype [(8 "b") (16 "h") (32 "s")])
+(define_int_attr bits_etype [(8 "b") (16 "h") (32 "s") (64 "d")])
(and (match_code "const_int")
(match_test "op == CONST0_RTX (mode)")))
+(define_predicate "const_0_to_7_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+
+(define_predicate "const_0_to_4_step_4_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 4)")
+ (match_test "(INTVAL (op) & 3) == 0")))
+
+(define_predicate "const_0_to_6_step_2_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 6)")
+ (match_test "(INTVAL (op) & 1) == 0")))
+
+(define_predicate "const_0_to_12_step_4_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 12)")
+ (match_test "(INTVAL (op) & 3) == 0")))
+
+(define_predicate "const_0_to_14_step_2_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 14)")
+ (match_test "(INTVAL (op) & 1) == 0")))
+
(define_predicate "const_1_to_3_operand"
(match_code "const_int,const_vector")
{
;; Shifts with a range 1-bit_size (aarch64_simd_shift_imm_offset)
;; Shifts with a range 0-bit_size (aarch64_simd_shift_imm_bitsize)
(define_predicate "aarch64_simd_shift_imm_qi"
- (and (match_code "const_int")
- (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+ (match_operand 0 "const_0_to_7_operand"))
(define_predicate "aarch64_simd_shift_imm_hi"
(and (match_code "const_int")
--- /dev/null
+# Assembly-based regression-test driver for the SME2 ACLE.
+# Copyright (C) 2009-2023 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>. */
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an AArch64 target.
+if {![istarget aarch64*-*-*] } {
+ return
+}
+
+# Load support procs.
+load_lib g++-dg.exp
+
+# Initialize `dg'.
+dg-init
+
+# Force SME2 if we're not testing it already.
+if { [check_effective_target_aarch64_sme2] } {
+ set sme2_flags ""
+} else {
+ set sme2_flags "-march=armv9-a+sme2"
+}
+
+# Turn off any codegen tweaks by default that may affect expected assembly.
+# Tests relying on those should turn them on explicitly.
+set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none"
+
+global gcc_runtest_parallelize_limit_minor
+if { [info exists gcc_runtest_parallelize_limit_minor] } {
+ set old_limit_minor $gcc_runtest_parallelize_limit_minor
+ set gcc_runtest_parallelize_limit_minor 1
+}
+
+torture-init
+set-torture-options {
+ "-std=c++11 -O0 -g"
+ "-std=c++14 -O1 -g"
+ "-std=c++17 -Og -g"
+ "-std=c++23 -Os -g"
+ "-std=gnu++11 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps"
+ "-std=gnu++23 -Ofast -g"
+} {
+ "-DTEST_FULL"
+ "-DTEST_OVERLOADS"
+}
+
+# Main loop.
+set gcc_subdir [string replace $subdir 0 2 gcc]
+set files [glob -nocomplain $srcdir/$gcc_subdir/acle-asm/*.c]
+set save-dg-do-what-default ${dg-do-what-default}
+if { [check_effective_target_aarch64_asm_sme2_ok] } {
+ set dg-do-what-default assemble
+} else {
+ set dg-do-what-default compile
+}
+gcc-dg-runtest [lsort $files] "" "$sme2_flags -fno-ipa-icf"
+set dg-do-what-default ${save-dg-do-what-default}
+
+torture-finish
+
+if { [info exists gcc_runtest_parallelize_limit_minor] } {
+ set gcc_runtest_parallelize_limit_minor $old_limit_minor
+}
+
+# All done.
+dg-finish
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** clamp_s16_tied1:
+** sclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s16_tied1, svint16_t,
+ z0 = svclamp_s16 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_s16_tied2:
+** sclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s16_tied2, svint16_t,
+ z0 = svclamp_s16 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_s16_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** sclamp z0\.h, z2\.h, \1\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s16_tied3, svint16_t,
+ z0 = svclamp_s16 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_s16_untied:
+** movprfx z0, z1
+** sclamp z0\.h, z2\.h, z3\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s16_untied, svint16_t,
+ z0 = svclamp_s16 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** clamp_s32_tied1:
+** sclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s32_tied1, svint32_t,
+ z0 = svclamp_s32 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_s32_tied2:
+** sclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s32_tied2, svint32_t,
+ z0 = svclamp_s32 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** sclamp z0\.s, z2\.s, \1\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s32_tied3, svint32_t,
+ z0 = svclamp_s32 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_s32_untied:
+** movprfx z0, z1
+** sclamp z0\.s, z2\.s, z3\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s32_untied, svint32_t,
+ z0 = svclamp_s32 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** clamp_s64_tied1:
+** sclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s64_tied1, svint64_t,
+ z0 = svclamp_s64 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_s64_tied2:
+** sclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s64_tied2, svint64_t,
+ z0 = svclamp_s64 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_s64_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** sclamp z0\.d, z2\.d, \1\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s64_tied3, svint64_t,
+ z0 = svclamp_s64 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_s64_untied:
+** movprfx z0, z1
+** sclamp z0\.d, z2\.d, z3\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s64_untied, svint64_t,
+ z0 = svclamp_s64 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** clamp_s8_tied1:
+** sclamp z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s8_tied1, svint8_t,
+ z0 = svclamp_s8 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_s8_tied2:
+** sclamp z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s8_tied2, svint8_t,
+ z0 = svclamp_s8 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_s8_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** sclamp z0\.b, z2\.b, \1\.b
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s8_tied3, svint8_t,
+ z0 = svclamp_s8 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_s8_untied:
+** movprfx z0, z1
+** sclamp z0\.b, z2\.b, z3\.b
+** ret
+*/
+TEST_UNIFORM_Z (clamp_s8_untied, svint8_t,
+ z0 = svclamp_s8 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** clamp_u16_tied1:
+** uclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u16_tied1, svuint16_t,
+ z0 = svclamp_u16 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_u16_tied2:
+** uclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u16_tied2, svuint16_t,
+ z0 = svclamp_u16 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_u16_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** uclamp z0\.h, z2\.h, \1\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u16_tied3, svuint16_t,
+ z0 = svclamp_u16 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_u16_untied:
+** movprfx z0, z1
+** uclamp z0\.h, z2\.h, z3\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u16_untied, svuint16_t,
+ z0 = svclamp_u16 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** clamp_u32_tied1:
+** uclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u32_tied1, svuint32_t,
+ z0 = svclamp_u32 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_u32_tied2:
+** uclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u32_tied2, svuint32_t,
+ z0 = svclamp_u32 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_u32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** uclamp z0\.s, z2\.s, \1\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u32_tied3, svuint32_t,
+ z0 = svclamp_u32 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_u32_untied:
+** movprfx z0, z1
+** uclamp z0\.s, z2\.s, z3\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u32_untied, svuint32_t,
+ z0 = svclamp_u32 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** clamp_u64_tied1:
+** uclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u64_tied1, svuint64_t,
+ z0 = svclamp_u64 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_u64_tied2:
+** uclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u64_tied2, svuint64_t,
+ z0 = svclamp_u64 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_u64_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** uclamp z0\.d, z2\.d, \1\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u64_tied3, svuint64_t,
+ z0 = svclamp_u64 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_u64_untied:
+** movprfx z0, z1
+** uclamp z0\.d, z2\.d, z3\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u64_untied, svuint64_t,
+ z0 = svclamp_u64 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** clamp_u8_tied1:
+** uclamp z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u8_tied1, svuint8_t,
+ z0 = svclamp_u8 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_u8_tied2:
+** uclamp z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u8_tied2, svuint8_t,
+ z0 = svclamp_u8 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_u8_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** uclamp z0\.b, z2\.b, \1\.b
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u8_tied3, svuint8_t,
+ z0 = svclamp_u8 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_u8_untied:
+** movprfx z0, z1
+** uclamp z0\.b, z2\.b, z3\.b
+** ret
+*/
+TEST_UNIFORM_Z (clamp_u8_untied, svuint8_t,
+ z0 = svclamp_u8 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_bf16_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_bf16_m_tied12, svbfloat16_t,
+ z0 = svrevd_bf16_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_bf16_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_bf16_m_tied1, svbfloat16_t,
+ z0 = svrevd_bf16_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_bf16_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_bf16_m_tied2, svbfloat16_t,
+ z0 = svrevd_bf16_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_bf16_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_bf16_m_untied, svbfloat16_t,
+ z0 = svrevd_bf16_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_bf16_z_tied1, svbfloat16_t,
+ z0 = svrevd_bf16_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_bf16_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_bf16_z_untied, svbfloat16_t,
+ z0 = svrevd_bf16_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_bf16_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_bf16_x_tied1, svbfloat16_t,
+ z0 = svrevd_bf16_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_bf16_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_bf16_x_untied, svbfloat16_t,
+ z0 = svrevd_bf16_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_f16_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f16_m_tied12, svfloat16_t,
+ z0 = svrevd_f16_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_f16_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f16_m_tied1, svfloat16_t,
+ z0 = svrevd_f16_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_f16_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f16_m_tied2, svfloat16_t,
+ z0 = svrevd_f16_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_f16_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f16_m_untied, svfloat16_t,
+ z0 = svrevd_f16_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_f16_z_tied1, svfloat16_t,
+ z0 = svrevd_f16_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_f16_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f16_z_untied, svfloat16_t,
+ z0 = svrevd_f16_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_f16_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f16_x_tied1, svfloat16_t,
+ z0 = svrevd_f16_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_f16_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f16_x_untied, svfloat16_t,
+ z0 = svrevd_f16_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_f32_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f32_m_tied12, svfloat32_t,
+ z0 = svrevd_f32_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_f32_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f32_m_tied1, svfloat32_t,
+ z0 = svrevd_f32_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_f32_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f32_m_tied2, svfloat32_t,
+ z0 = svrevd_f32_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_f32_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f32_m_untied, svfloat32_t,
+ z0 = svrevd_f32_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_f32_z_tied1, svfloat32_t,
+ z0 = svrevd_f32_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_f32_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f32_z_untied, svfloat32_t,
+ z0 = svrevd_f32_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_f32_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f32_x_tied1, svfloat32_t,
+ z0 = svrevd_f32_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_f32_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f32_x_untied, svfloat32_t,
+ z0 = svrevd_f32_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_f64_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f64_m_tied12, svfloat64_t,
+ z0 = svrevd_f64_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_f64_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f64_m_tied1, svfloat64_t,
+ z0 = svrevd_f64_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_f64_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f64_m_tied2, svfloat64_t,
+ z0 = svrevd_f64_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_f64_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f64_m_untied, svfloat64_t,
+ z0 = svrevd_f64_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_f64_z_tied1, svfloat64_t,
+ z0 = svrevd_f64_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_f64_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f64_z_untied, svfloat64_t,
+ z0 = svrevd_f64_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_f64_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f64_x_tied1, svfloat64_t,
+ z0 = svrevd_f64_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_f64_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_f64_x_untied, svfloat64_t,
+ z0 = svrevd_f64_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_s16_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s16_m_tied12, svint16_t,
+ z0 = svrevd_s16_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_s16_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s16_m_tied1, svint16_t,
+ z0 = svrevd_s16_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_s16_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s16_m_tied2, svint16_t,
+ z0 = svrevd_s16_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_s16_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s16_m_untied, svint16_t,
+ z0 = svrevd_s16_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_s16_z_tied1, svint16_t,
+ z0 = svrevd_s16_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_s16_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s16_z_untied, svint16_t,
+ z0 = svrevd_s16_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_s16_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s16_x_tied1, svint16_t,
+ z0 = svrevd_s16_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_s16_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s16_x_untied, svint16_t,
+ z0 = svrevd_s16_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_s32_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s32_m_tied12, svint32_t,
+ z0 = svrevd_s32_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_s32_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s32_m_tied1, svint32_t,
+ z0 = svrevd_s32_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_s32_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s32_m_tied2, svint32_t,
+ z0 = svrevd_s32_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_s32_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s32_m_untied, svint32_t,
+ z0 = svrevd_s32_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_s32_z_tied1, svint32_t,
+ z0 = svrevd_s32_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_s32_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s32_z_untied, svint32_t,
+ z0 = svrevd_s32_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_s32_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s32_x_tied1, svint32_t,
+ z0 = svrevd_s32_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_s32_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s32_x_untied, svint32_t,
+ z0 = svrevd_s32_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_s64_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s64_m_tied12, svint64_t,
+ z0 = svrevd_s64_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_s64_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s64_m_tied1, svint64_t,
+ z0 = svrevd_s64_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_s64_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s64_m_tied2, svint64_t,
+ z0 = svrevd_s64_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_s64_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s64_m_untied, svint64_t,
+ z0 = svrevd_s64_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_s64_z_tied1, svint64_t,
+ z0 = svrevd_s64_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_s64_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s64_z_untied, svint64_t,
+ z0 = svrevd_s64_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_s64_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s64_x_tied1, svint64_t,
+ z0 = svrevd_s64_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_s64_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s64_x_untied, svint64_t,
+ z0 = svrevd_s64_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_s8_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s8_m_tied12, svint8_t,
+ z0 = svrevd_s8_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_s8_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s8_m_tied1, svint8_t,
+ z0 = svrevd_s8_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_s8_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s8_m_tied2, svint8_t,
+ z0 = svrevd_s8_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_s8_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s8_m_untied, svint8_t,
+ z0 = svrevd_s8_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_s8_z_tied1, svint8_t,
+ z0 = svrevd_s8_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_s8_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s8_z_untied, svint8_t,
+ z0 = svrevd_s8_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_s8_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s8_x_tied1, svint8_t,
+ z0 = svrevd_s8_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_s8_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_s8_x_untied, svint8_t,
+ z0 = svrevd_s8_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_u16_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u16_m_tied12, svuint16_t,
+ z0 = svrevd_u16_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_u16_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u16_m_tied1, svuint16_t,
+ z0 = svrevd_u16_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_u16_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u16_m_tied2, svuint16_t,
+ z0 = svrevd_u16_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_u16_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u16_m_untied, svuint16_t,
+ z0 = svrevd_u16_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_u16_z_tied1, svuint16_t,
+ z0 = svrevd_u16_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_u16_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u16_z_untied, svuint16_t,
+ z0 = svrevd_u16_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_u16_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u16_x_tied1, svuint16_t,
+ z0 = svrevd_u16_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_u16_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u16_x_untied, svuint16_t,
+ z0 = svrevd_u16_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_u32_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u32_m_tied12, svuint32_t,
+ z0 = svrevd_u32_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_u32_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u32_m_tied1, svuint32_t,
+ z0 = svrevd_u32_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_u32_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u32_m_tied2, svuint32_t,
+ z0 = svrevd_u32_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_u32_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u32_m_untied, svuint32_t,
+ z0 = svrevd_u32_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_u32_z_tied1, svuint32_t,
+ z0 = svrevd_u32_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_u32_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u32_z_untied, svuint32_t,
+ z0 = svrevd_u32_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_u32_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u32_x_tied1, svuint32_t,
+ z0 = svrevd_u32_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_u32_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u32_x_untied, svuint32_t,
+ z0 = svrevd_u32_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_u64_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u64_m_tied12, svuint64_t,
+ z0 = svrevd_u64_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_u64_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u64_m_tied1, svuint64_t,
+ z0 = svrevd_u64_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_u64_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u64_m_tied2, svuint64_t,
+ z0 = svrevd_u64_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_u64_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u64_m_untied, svuint64_t,
+ z0 = svrevd_u64_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_u64_z_tied1, svuint64_t,
+ z0 = svrevd_u64_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_u64_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u64_z_untied, svuint64_t,
+ z0 = svrevd_u64_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_u64_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u64_x_tied1, svuint64_t,
+ z0 = svrevd_u64_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_u64_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u64_x_untied, svuint64_t,
+ z0 = svrevd_u64_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** revd_u8_m_tied12:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u8_m_tied12, svuint8_t,
+ z0 = svrevd_u8_m (z0, p0, z0),
+ z0 = svrevd_m (z0, p0, z0))
+
+/*
+** revd_u8_m_tied1:
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u8_m_tied1, svuint8_t,
+ z0 = svrevd_u8_m (z0, p0, z1),
+ z0 = svrevd_m (z0, p0, z1))
+
+/*
+** revd_u8_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** revd z0\.q, p0/m, \1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u8_m_tied2, svuint8_t,
+ z0 = svrevd_u8_m (z1, p0, z0),
+ z0 = svrevd_m (z1, p0, z0))
+
+/*
+** revd_u8_m_untied:
+** movprfx z0, z2
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u8_m_untied, svuint8_t,
+ z0 = svrevd_u8_m (z2, p0, z1),
+ z0 = svrevd_m (z2, p0, z1))
+
+/* Awkward register allocation. Don't require specific output. */
+TEST_UNIFORM_Z (revd_u8_z_tied1, svuint8_t,
+ z0 = svrevd_u8_z (p0, z0),
+ z0 = svrevd_z (p0, z0))
+
+/*
+** revd_u8_z_untied:
+** mov z0\.[bhsd], #0
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u8_z_untied, svuint8_t,
+ z0 = svrevd_u8_z (p0, z1),
+ z0 = svrevd_z (p0, z1))
+
+/*
+** revd_u8_x_tied1:
+** revd z0\.q, p0/m, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u8_x_tied1, svuint8_t,
+ z0 = svrevd_u8_x (p0, z0),
+ z0 = svrevd_x (p0, z0))
+
+/*
+** revd_u8_x_untied:
+** movprfx z0, z1
+** revd z0\.q, p0/m, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (revd_u8_x_untied, svuint8_t,
+ z0 = svrevd_u8_x (p0, z1),
+ z0 = svrevd_x (p0, z1))
--- /dev/null
+// { dg-options "-O" }
+
+#include <arm_sme.h>
+
+#define TEST(TYPE) \
+ TYPE \
+ tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+ { \
+ return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), a, b), c); \
+ } \
+ \
+ TYPE \
+ tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+ { \
+ return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, a), c); \
+ }
+
+TEST(svint8_t)
+TEST(svint16_t)
+TEST(svint32_t)
+TEST(svint64_t)
+
+TEST(svuint8_t)
+TEST(svuint16_t)
+TEST(svuint32_t)
+TEST(svuint64_t)
+
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
--- /dev/null
+// { dg-options "-O" }
+
+#include <arm_sme.h>
+
+#define TEST(TYPE) \
+ TYPE \
+ untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming \
+ { \
+ return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, c), d); \
+ }
+
+TEST(svint8_t)
+TEST(svint16_t)
+TEST(svint32_t)
+TEST(svint64_t)
+
+TEST(svuint8_t)
+TEST(svuint16_t)
+TEST(svuint32_t)
+TEST(svuint64_t)
+
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 8 } } */
--- /dev/null
+// { dg-options "-O" }
+
+#include <arm_sme.h>
+
+#define TEST(TYPE) \
+ TYPE \
+ tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c); \
+ } \
+ \
+ TYPE \
+ tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c); \
+ }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
--- /dev/null
+// { dg-options "-O" }
+
+#include <arm_sme.h>
+
+#define TEST(TYPE) \
+ TYPE \
+ untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, c), d); \
+ }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 3 } } */
--- /dev/null
+# Assembly-based regression-test driver for the SME2 ACLE.
+# Copyright (C) 2009-2023 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>. */
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an AArch64 target.
+if {![istarget aarch64*-*-*] } {
+ return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Initialize `dg'.
+dg-init
+
+# Force SME2 if we're not testing it already.
+if { [check_effective_target_aarch64_sme2] } {
+ set sme2_flags ""
+} else {
+ set sme2_flags "-march=armv9-a+sme2"
+}
+
+# Turn off any codegen tweaks by default that may affect expected assembly.
+# Tests relying on those should turn them on explicitly.
+set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none"
+
+global gcc_runtest_parallelize_limit_minor
+if { [info exists gcc_runtest_parallelize_limit_minor] } {
+ set old_limit_minor $gcc_runtest_parallelize_limit_minor
+ set gcc_runtest_parallelize_limit_minor 1
+}
+
+torture-init
+set-torture-options {
+ "-std=c90 -O0 -g"
+ "-std=c99 -Og -g"
+ "-std=c11 -Os -g"
+ "-std=c23 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps"
+ "-std=gnu90 -O3 -g"
+ "-std=gnu23 -Ofast -g"
+} {
+ "-DTEST_FULL"
+ "-DTEST_OVERLOADS"
+}
+
+# Main loop.
+set files [glob -nocomplain $srcdir/$subdir/acle-asm/*.c]
+set save-dg-do-what-default ${dg-do-what-default}
+if { [check_effective_target_aarch64_asm_sme2_ok] } {
+ set dg-do-what-default assemble
+} else {
+ set dg-do-what-default compile
+}
+gcc-dg-runtest [lsort $files] "" "$sme2_flags -fno-ipa-icf"
+set dg-do-what-default ${save-dg-do-what-default}
+
+torture-finish
+
+if { [info exists gcc_runtest_parallelize_limit_minor] } {
+ set gcc_runtest_parallelize_limit_minor $old_limit_minor
+}
+
+# All done.
+dg-finish
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svint16x2_t, svint16_t, z24,
+ svadd_single_s16_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** add {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svint16x2_t, svint16_t, z24,
+ svadd_single_s16_x2 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svint16x2_t, svint16_t, z24,
+ svadd_single_s16_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svint16x2_t, svint16_t, z1,
+ svadd_single_s16_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svint16x2_t, svint16_t, z1,
+ svadd_single_s16_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** add {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svint16x2_t, svint16_t, z18,
+ svadd_single_s16_x2 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svint16x2_t, svint16_t,
+ z0_res = svadd_single_s16_x2 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint16x2_t, svint16_t,
+ z0 = svadd_single_s16_x2 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svint16x2_t, svint16_t, z24,
+ svadd_single_s16_x2 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svint16x4_t, svint16_t, z24,
+ svadd_single_s16_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** add {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svint16x4_t, svint16_t, z24,
+ svadd_single_s16_x4 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svint16x4_t, svint16_t, z24,
+ svadd_single_s16_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svint16x4_t, svint16_t, z1,
+ svadd_single_s16_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svint16x4_t, svint16_t, z1,
+ svadd_single_s16_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svint16x4_t, svint16_t, z18,
+ svadd_single_s16_x4 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svint16x4_t, svint16_t,
+ z0_res = svadd_single_s16_x4 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint16x4_t, svint16_t,
+ z0 = svadd_single_s16_x4 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svint16x4_t, svint16_t, z24,
+ svadd_single_s16_x4 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svint32x2_t, svint32_t, z24,
+ svadd_single_s32_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** add {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svint32x2_t, svint32_t, z24,
+ svadd_single_s32_x2 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svint32x2_t, svint32_t, z24,
+ svadd_single_s32_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svint32x2_t, svint32_t, z1,
+ svadd_single_s32_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svint32x2_t, svint32_t, z1,
+ svadd_single_s32_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** add {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svint32x2_t, svint32_t, z18,
+ svadd_single_s32_x2 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svint32x2_t, svint32_t,
+ z0_res = svadd_single_s32_x2 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint32x2_t, svint32_t,
+ z0 = svadd_single_s32_x2 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svint32x2_t, svint32_t, z24,
+ svadd_single_s32_x2 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svint32x4_t, svint32_t, z24,
+ svadd_single_s32_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** add {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svint32x4_t, svint32_t, z24,
+ svadd_single_s32_x4 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svint32x4_t, svint32_t, z24,
+ svadd_single_s32_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svint32x4_t, svint32_t, z1,
+ svadd_single_s32_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svint32x4_t, svint32_t, z1,
+ svadd_single_s32_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svint32x4_t, svint32_t, z18,
+ svadd_single_s32_x4 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svint32x4_t, svint32_t,
+ z0_res = svadd_single_s32_x4 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint32x4_t, svint32_t,
+ z0 = svadd_single_s32_x4 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svint32x4_t, svint32_t, z24,
+ svadd_single_s32_x4 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svint64x2_t, svint64_t, z24,
+ svadd_single_s64_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** add {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svint64x2_t, svint64_t, z24,
+ svadd_single_s64_x2 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svint64x2_t, svint64_t, z24,
+ svadd_single_s64_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svint64x2_t, svint64_t, z1,
+ svadd_single_s64_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svint64x2_t, svint64_t, z1,
+ svadd_single_s64_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** add {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svint64x2_t, svint64_t, z18,
+ svadd_single_s64_x2 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svint64x2_t, svint64_t,
+ z0_res = svadd_single_s64_x2 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint64x2_t, svint64_t,
+ z0 = svadd_single_s64_x2 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svint64x2_t, svint64_t, z24,
+ svadd_single_s64_x2 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svint64x4_t, svint64_t, z24,
+ svadd_single_s64_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** add {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svint64x4_t, svint64_t, z24,
+ svadd_single_s64_x4 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svint64x4_t, svint64_t, z24,
+ svadd_single_s64_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svint64x4_t, svint64_t, z1,
+ svadd_single_s64_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svint64x4_t, svint64_t, z1,
+ svadd_single_s64_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svint64x4_t, svint64_t, z18,
+ svadd_single_s64_x4 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svint64x4_t, svint64_t,
+ z0_res = svadd_single_s64_x4 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint64x4_t, svint64_t,
+ z0 = svadd_single_s64_x4 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svint64x4_t, svint64_t, z24,
+ svadd_single_s64_x4 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svint8x2_t, svint8_t, z24,
+ svadd_single_s8_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** |
+** add {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svint8x2_t, svint8_t, z24,
+ svadd_single_s8_x2 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svint8x2_t, svint8_t, z24,
+ svadd_single_s8_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svint8x2_t, svint8_t, z1,
+ svadd_single_s8_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svint8x2_t, svint8_t, z1,
+ svadd_single_s8_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** add {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svint8x2_t, svint8_t, z18,
+ svadd_single_s8_x2 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svint8x2_t, svint8_t,
+ z0_res = svadd_single_s8_x2 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint8x2_t, svint8_t,
+ z0 = svadd_single_s8_x2 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svint8x2_t, svint8_t, z24,
+ svadd_single_s8_x2 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svint8x4_t, svint8_t, z24,
+ svadd_single_s8_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** |
+** add {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svint8x4_t, svint8_t, z24,
+ svadd_single_s8_x4 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svint8x4_t, svint8_t, z24,
+ svadd_single_s8_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svint8x4_t, svint8_t, z1,
+ svadd_single_s8_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svint8x4_t, svint8_t, z1,
+ svadd_single_s8_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svint8x4_t, svint8_t, z18,
+ svadd_single_s8_x4 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svint8x4_t, svint8_t,
+ z0_res = svadd_single_s8_x4 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint8x4_t, svint8_t,
+ z0 = svadd_single_s8_x4 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svint8x4_t, svint8_t, z24,
+ svadd_single_s8_x4 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svuint16x2_t, svuint16_t, z24,
+ svadd_single_u16_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** add {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svuint16x2_t, svuint16_t, z24,
+ svadd_single_u16_x2 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svuint16x2_t, svuint16_t, z24,
+ svadd_single_u16_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svuint16x2_t, svuint16_t, z1,
+ svadd_single_u16_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svuint16x2_t, svuint16_t, z1,
+ svadd_single_u16_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** add {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svuint16x2_t, svuint16_t, z18,
+ svadd_single_u16_x2 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svuint16x2_t, svuint16_t,
+ z0_res = svadd_single_u16_x2 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint16x2_t, svuint16_t,
+ z0 = svadd_single_u16_x2 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svuint16x2_t, svuint16_t, z24,
+ svadd_single_u16_x2 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svuint16x4_t, svuint16_t, z24,
+ svadd_single_u16_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** add {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svuint16x4_t, svuint16_t, z24,
+ svadd_single_u16_x4 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svuint16x4_t, svuint16_t, z24,
+ svadd_single_u16_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svuint16x4_t, svuint16_t, z1,
+ svadd_single_u16_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svuint16x4_t, svuint16_t, z1,
+ svadd_single_u16_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svuint16x4_t, svuint16_t, z18,
+ svadd_single_u16_x4 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svuint16x4_t, svuint16_t,
+ z0_res = svadd_single_u16_x4 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint16x4_t, svuint16_t,
+ z0 = svadd_single_u16_x4 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svuint16x4_t, svuint16_t, z24,
+ svadd_single_u16_x4 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svuint32x2_t, svuint32_t, z24,
+ svadd_single_u32_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** add {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svuint32x2_t, svuint32_t, z24,
+ svadd_single_u32_x2 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svuint32x2_t, svuint32_t, z24,
+ svadd_single_u32_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svuint32x2_t, svuint32_t, z1,
+ svadd_single_u32_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svuint32x2_t, svuint32_t, z1,
+ svadd_single_u32_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** add {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svuint32x2_t, svuint32_t, z18,
+ svadd_single_u32_x2 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svuint32x2_t, svuint32_t,
+ z0_res = svadd_single_u32_x2 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint32x2_t, svuint32_t,
+ z0 = svadd_single_u32_x2 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svuint32x2_t, svuint32_t, z24,
+ svadd_single_u32_x2 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svuint32x4_t, svuint32_t, z24,
+ svadd_single_u32_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** add {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svuint32x4_t, svuint32_t, z24,
+ svadd_single_u32_x4 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svuint32x4_t, svuint32_t, z24,
+ svadd_single_u32_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svuint32x4_t, svuint32_t, z1,
+ svadd_single_u32_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svuint32x4_t, svuint32_t, z1,
+ svadd_single_u32_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svuint32x4_t, svuint32_t, z18,
+ svadd_single_u32_x4 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svuint32x4_t, svuint32_t,
+ z0_res = svadd_single_u32_x4 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint32x4_t, svuint32_t,
+ z0 = svadd_single_u32_x4 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svuint32x4_t, svuint32_t, z24,
+ svadd_single_u32_x4 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svuint64x2_t, svuint64_t, z24,
+ svadd_single_u64_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** add {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svuint64x2_t, svuint64_t, z24,
+ svadd_single_u64_x2 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svuint64x2_t, svuint64_t, z24,
+ svadd_single_u64_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svuint64x2_t, svuint64_t, z1,
+ svadd_single_u64_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svuint64x2_t, svuint64_t, z1,
+ svadd_single_u64_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** add {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svuint64x2_t, svuint64_t, z18,
+ svadd_single_u64_x2 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svuint64x2_t, svuint64_t,
+ z0_res = svadd_single_u64_x2 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint64x2_t, svuint64_t,
+ z0 = svadd_single_u64_x2 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svuint64x2_t, svuint64_t, z24,
+ svadd_single_u64_x2 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svuint64x4_t, svuint64_t, z24,
+ svadd_single_u64_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** add {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svuint64x4_t, svuint64_t, z24,
+ svadd_single_u64_x4 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svuint64x4_t, svuint64_t, z24,
+ svadd_single_u64_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svuint64x4_t, svuint64_t, z1,
+ svadd_single_u64_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svuint64x4_t, svuint64_t, z1,
+ svadd_single_u64_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svuint64x4_t, svuint64_t, z18,
+ svadd_single_u64_x4 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svuint64x4_t, svuint64_t,
+ z0_res = svadd_single_u64_x4 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint64x4_t, svuint64_t,
+ z0 = svadd_single_u64_x4 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svuint64x4_t, svuint64_t, z24,
+ svadd_single_u64_x4 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svuint8x2_t, svuint8_t, z24,
+ svadd_single_u8_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** |
+** add {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svuint8x2_t, svuint8_t, z24,
+ svadd_single_u8_x2 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svuint8x2_t, svuint8_t, z24,
+ svadd_single_u8_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svuint8x2_t, svuint8_t, z1,
+ svadd_single_u8_x2 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svuint8x2_t, svuint8_t, z1,
+ svadd_single_u8_x2 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** add {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svuint8x2_t, svuint8_t, z18,
+ svadd_single_u8_x2 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svuint8x2_t, svuint8_t,
+ z0_res = svadd_single_u8_x2 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint8x2_t, svuint8_t,
+ z0 = svadd_single_u8_x2 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svuint8x2_t, svuint8_t, z24,
+ svadd_single_u8_x2 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_z24_z24_z0:
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z0, svuint8x4_t, svuint8_t, z24,
+ svadd_single_u8_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** |
+** add {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z28_z0, svuint8x4_t, svuint8_t, z24,
+ svadd_single_u8_x4 (z28, z0),
+ svadd (z28, z0))
+
+/*
+** add_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z1_z0, svuint8x4_t, svuint8_t, z24,
+ svadd_single_u8_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z1_z24_z0:
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z24_z0, svuint8x4_t, svuint8_t, z1,
+ svadd_single_u8_x4 (z24, z0),
+ svadd (z24, z0))
+
+/*
+** add_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z1_z1_z0, svuint8x4_t, svuint8_t, z1,
+ svadd_single_u8_x4 (z1, z0),
+ svadd (z1, z0))
+
+/*
+** add_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (add_z18_z18_z0, svuint8x4_t, svuint8_t, z18,
+ svadd_single_u8_x4 (z18, z0),
+ svadd (z18, z0))
+
+/*
+** add_awkward:
+** ...
+** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (add_awkward, svuint8x4_t, svuint8_t,
+ z0_res = svadd_single_u8_x4 (z1, z0),
+ z0_res = svadd (z1, z0))
+
+/*
+** add_z0_z0_z15:
+** ...
+** add {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint8x4_t, svuint8_t,
+ z0 = svadd_single_u8_x4 (z0, z15),
+ z0 = svadd (z0, z15))
+
+/*
+** add_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (add_z24_z24_z16, svuint8x4_t, svuint8_t, z24,
+ svadd_single_u8_x4 (z24, z16),
+ svadd (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_0_z0_z0, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (0, z0, z0),
+ svadd_write_za32_vg1x2 (0, z0, z0))
+
+/*
+** add_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w0_z0_z0, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (w0, z0, z0),
+ svadd_write_za32_vg1x2 (w0, z0, z0))
+
+/*
+** add_write_w8_z0_z4:
+** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z4, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (w8, z0, z4),
+ svadd_write_za32_vg1x2 (w8, z0, z4))
+
+/*
+** add_write_w8_z4_z18:
+** add za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z4_z18, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (w8, z4, z18),
+ svadd_write_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_write_w8_z23_z0:
+** ...
+** add za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z23_z0, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (w8, z23, z0),
+ svadd_write_za32_vg1x2 (w8, z23, z0))
+
+/*
+** add_write_w8_z18_z23:
+** ...
+** add za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z18_z23, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (w8, z18, z23),
+ svadd_write_za32_vg1x2 (w8, z18, z23))
+
+/*
+** add_write_w8_z4_z28:
+** add za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z4_z28, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (w8, z4, z28),
+ svadd_write_za32_vg1x2 (w8, z4, z28))
+
+/*
+** add_write_w8p7_z4_z0:
+** add za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p7_z4_z0, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (w8 + 7, z4, z0),
+ svadd_write_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** add_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p8_z4_z4, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (w8 + 8, z4, z4),
+ svadd_write_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** add_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8m1_z4_z0, svint32x2_t,
+ svadd_write_za32_s32_vg1x2 (w8 - 1, z4, z0),
+ svadd_write_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** add_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_0_z1_z0, svint32x2_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x2 (0, z1, z0),
+ svadd_write_za32_vg1x2 (0, z1, z0))
+
+/*
+** add_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svint32x2_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x2 (w0, z1, z0),
+ svadd_write_za32_vg1x2 (w0, z1, z0))
+
+/*
+** add_write_single_w8_z1_z0:
+** add za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svint32x2_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x2 (w8, z1, z0),
+ svadd_write_za32_vg1x2 (w8, z1, z0))
+
+/*
+** add_write_single_w8p7_z1_z0:
+** add za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svint32x2_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x2 (w8 + 7, z1, z0),
+ svadd_write_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** add_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svint32x2_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x2 (w8 + 8, z1, z0),
+ svadd_write_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** add_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svint32x2_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x2 (w0 - 1, z1, z0),
+ svadd_write_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** add_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svint32x2_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x2 (w8, z0, z15),
+ svadd_write_za32_vg1x2 (w8, z0, z15))
+
+/*
+** add_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** add za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svint32x2_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x2 (w8, z20, z16),
+ svadd_write_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_0_z0_z0, svint32x4_t,
+ svadd_write_za32_s32_vg1x4 (0, z0, z0),
+ svadd_write_za32_vg1x4 (0, z0, z0))
+
+/*
+** add_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w0_z0_z0, svint32x4_t,
+ svadd_write_za32_s32_vg1x4 (w0, z0, z0),
+ svadd_write_za32_vg1x4 (w0, z0, z0))
+
+/*
+** add_write_w8_z0_z4:
+** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z4, svint32x4_t,
+ svadd_write_za32_s32_vg1x4 (w8, z0, z4),
+ svadd_write_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_write_w8_z0_z18:
+** ...
+** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z18, svint32x4_t,
+ svadd_write_za32_s32_vg1x4 (w8, z0, z18),
+ svadd_write_za32_vg1x4 (w8, z0, z18))
+
+/*
+** add_write_w8_z18_z28:
+** ...
+** add za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z18_z28, svint32x4_t,
+ svadd_write_za32_s32_vg1x4 (w8, z18, z28),
+ svadd_write_za32_vg1x4 (w8, z18, z28))
+
+/*
+** add_write_w8_z28_z23:
+** ...
+** add za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z28_z23, svint32x4_t,
+ svadd_write_za32_s32_vg1x4 (w8, z28, z23),
+ svadd_write_za32_vg1x4 (w8, z28, z23))
+
+/*
+** add_write_w8p7_z4_z0:
+** add za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p7_z4_z0, svint32x4_t,
+ svadd_write_za32_s32_vg1x4 (w8 + 7, z4, z0),
+ svadd_write_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** add_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p8_z4_z4, svint32x4_t,
+ svadd_write_za32_s32_vg1x4 (w8 + 8, z4, z4),
+ svadd_write_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** add_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8m1_z4_z0, svint32x4_t,
+ svadd_write_za32_s32_vg1x4 (w8 - 1, z4, z0),
+ svadd_write_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** add_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_0_z1_z0, svint32x4_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x4 (0, z1, z0),
+ svadd_write_za32_vg1x4 (0, z1, z0))
+
+/*
+** add_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svint32x4_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x4 (w0, z1, z0),
+ svadd_write_za32_vg1x4 (w0, z1, z0))
+
+/*
+** add_write_single_w8_z1_z0:
+** add za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svint32x4_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x4 (w8, z1, z0),
+ svadd_write_za32_vg1x4 (w8, z1, z0))
+
+/*
+** add_write_single_w8p7_z1_z0:
+** add za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svint32x4_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x4 (w8 + 7, z1, z0),
+ svadd_write_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** add_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svint32x4_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x4 (w8 + 8, z1, z0),
+ svadd_write_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** add_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svint32x4_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x4 (w0 - 1, z1, z0),
+ svadd_write_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** add_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svint32x4_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x4 (w8, z0, z15),
+ svadd_write_za32_vg1x4 (w8, z0, z15))
+
+/*
+** add_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** add za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svint32x4_t, svint32_t,
+ svadd_write_single_za32_s32_vg1x4 (w8, z20, z16),
+ svadd_write_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_0_z0_z0, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (0, z0, z0),
+ svadd_write_za32_vg1x2 (0, z0, z0))
+
+/*
+** add_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w0_z0_z0, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (w0, z0, z0),
+ svadd_write_za32_vg1x2 (w0, z0, z0))
+
+/*
+** add_write_w8_z0_z4:
+** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z4, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (w8, z0, z4),
+ svadd_write_za32_vg1x2 (w8, z0, z4))
+
+/*
+** add_write_w8_z4_z18:
+** add za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z4_z18, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (w8, z4, z18),
+ svadd_write_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_write_w8_z23_z0:
+** ...
+** add za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z23_z0, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (w8, z23, z0),
+ svadd_write_za32_vg1x2 (w8, z23, z0))
+
+/*
+** add_write_w8_z18_z23:
+** ...
+** add za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z18_z23, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (w8, z18, z23),
+ svadd_write_za32_vg1x2 (w8, z18, z23))
+
+/*
+** add_write_w8_z4_z28:
+** add za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z4_z28, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (w8, z4, z28),
+ svadd_write_za32_vg1x2 (w8, z4, z28))
+
+/*
+** add_write_w8p7_z4_z0:
+** add za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p7_z4_z0, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (w8 + 7, z4, z0),
+ svadd_write_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** add_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p8_z4_z4, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (w8 + 8, z4, z4),
+ svadd_write_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** add_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8m1_z4_z0, svuint32x2_t,
+ svadd_write_za32_u32_vg1x2 (w8 - 1, z4, z0),
+ svadd_write_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** add_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_0_z1_z0, svuint32x2_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x2 (0, z1, z0),
+ svadd_write_za32_vg1x2 (0, z1, z0))
+
+/*
+** add_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svuint32x2_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x2 (w0, z1, z0),
+ svadd_write_za32_vg1x2 (w0, z1, z0))
+
+/*
+** add_write_single_w8_z1_z0:
+** add za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svuint32x2_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x2 (w8, z1, z0),
+ svadd_write_za32_vg1x2 (w8, z1, z0))
+
+/*
+** add_write_single_w8p7_z1_z0:
+** add za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svuint32x2_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x2 (w8 + 7, z1, z0),
+ svadd_write_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** add_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svuint32x2_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x2 (w8 + 8, z1, z0),
+ svadd_write_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** add_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svuint32x2_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x2 (w0 - 1, z1, z0),
+ svadd_write_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** add_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svuint32x2_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x2 (w8, z0, z15),
+ svadd_write_za32_vg1x2 (w8, z0, z15))
+
+/*
+** add_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** add za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svuint32x2_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x2 (w8, z20, z16),
+ svadd_write_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_0_z0_z0, svuint32x4_t,
+ svadd_write_za32_u32_vg1x4 (0, z0, z0),
+ svadd_write_za32_vg1x4 (0, z0, z0))
+
+/*
+** add_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w0_z0_z0, svuint32x4_t,
+ svadd_write_za32_u32_vg1x4 (w0, z0, z0),
+ svadd_write_za32_vg1x4 (w0, z0, z0))
+
+/*
+** add_write_w8_z0_z4:
+** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z4, svuint32x4_t,
+ svadd_write_za32_u32_vg1x4 (w8, z0, z4),
+ svadd_write_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_write_w8_z0_z18:
+** ...
+** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z18, svuint32x4_t,
+ svadd_write_za32_u32_vg1x4 (w8, z0, z18),
+ svadd_write_za32_vg1x4 (w8, z0, z18))
+
+/*
+** add_write_w8_z18_z28:
+** ...
+** add za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z18_z28, svuint32x4_t,
+ svadd_write_za32_u32_vg1x4 (w8, z18, z28),
+ svadd_write_za32_vg1x4 (w8, z18, z28))
+
+/*
+** add_write_w8_z28_z23:
+** ...
+** add za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z28_z23, svuint32x4_t,
+ svadd_write_za32_u32_vg1x4 (w8, z28, z23),
+ svadd_write_za32_vg1x4 (w8, z28, z23))
+
+/*
+** add_write_w8p7_z4_z0:
+** add za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p7_z4_z0, svuint32x4_t,
+ svadd_write_za32_u32_vg1x4 (w8 + 7, z4, z0),
+ svadd_write_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** add_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p8_z4_z4, svuint32x4_t,
+ svadd_write_za32_u32_vg1x4 (w8 + 8, z4, z4),
+ svadd_write_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** add_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_write_w8m1_z4_z0, svuint32x4_t,
+ svadd_write_za32_u32_vg1x4 (w8 - 1, z4, z0),
+ svadd_write_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** add_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_0_z1_z0, svuint32x4_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x4 (0, z1, z0),
+ svadd_write_za32_vg1x4 (0, z1, z0))
+
+/*
+** add_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svuint32x4_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x4 (w0, z1, z0),
+ svadd_write_za32_vg1x4 (w0, z1, z0))
+
+/*
+** add_write_single_w8_z1_z0:
+** add za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svuint32x4_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x4 (w8, z1, z0),
+ svadd_write_za32_vg1x4 (w8, z1, z0))
+
+/*
+** add_write_single_w8p7_z1_z0:
+** add za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svuint32x4_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x4 (w8 + 7, z1, z0),
+ svadd_write_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** add_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svuint32x4_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x4 (w8 + 8, z1, z0),
+ svadd_write_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** add_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svuint32x4_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x4 (w0 - 1, z1, z0),
+ svadd_write_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** add_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svuint32x4_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x4 (w8, z0, z15),
+ svadd_write_za32_vg1x4 (w8, z0, z15))
+
+/*
+** add_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** add za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svuint32x4_t, svuint32_t,
+ svadd_write_single_za32_u32_vg1x4 (w8, z20, z16),
+ svadd_write_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_0_z0_z0, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (0, z0, z0),
+ svadd_write_za64_vg1x2 (0, z0, z0))
+
+/*
+** add_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w0_z0_z0, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (w0, z0, z0),
+ svadd_write_za64_vg1x2 (w0, z0, z0))
+
+/*
+** add_write_w8_z0_z4:
+** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z4, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (w8, z0, z4),
+ svadd_write_za64_vg1x2 (w8, z0, z4))
+
+/*
+** add_write_w8_z4_z18:
+** add za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z4_z18, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (w8, z4, z18),
+ svadd_write_za64_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_write_w8_z23_z0:
+** ...
+** add za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z23_z0, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (w8, z23, z0),
+ svadd_write_za64_vg1x2 (w8, z23, z0))
+
+/*
+** add_write_w8_z18_z23:
+** ...
+** add za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z18_z23, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (w8, z18, z23),
+ svadd_write_za64_vg1x2 (w8, z18, z23))
+
+/*
+** add_write_w8_z4_z28:
+** add za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z4_z28, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (w8, z4, z28),
+ svadd_write_za64_vg1x2 (w8, z4, z28))
+
+/*
+** add_write_w8p7_z4_z0:
+** add za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p7_z4_z0, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (w8 + 7, z4, z0),
+ svadd_write_za64_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** add_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p8_z4_z4, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (w8 + 8, z4, z4),
+ svadd_write_za64_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** add_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8m1_z4_z0, svint64x2_t,
+ svadd_write_za64_s64_vg1x2 (w8 - 1, z4, z0),
+ svadd_write_za64_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** add_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_0_z1_z0, svint64x2_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x2 (0, z1, z0),
+ svadd_write_za64_vg1x2 (0, z1, z0))
+
+/*
+** add_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svint64x2_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x2 (w0, z1, z0),
+ svadd_write_za64_vg1x2 (w0, z1, z0))
+
+/*
+** add_write_single_w8_z1_z0:
+** add za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svint64x2_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x2 (w8, z1, z0),
+ svadd_write_za64_vg1x2 (w8, z1, z0))
+
+/*
+** add_write_single_w8p7_z1_z0:
+** add za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svint64x2_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x2 (w8 + 7, z1, z0),
+ svadd_write_za64_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** add_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svint64x2_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x2 (w8 + 8, z1, z0),
+ svadd_write_za64_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** add_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svint64x2_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x2 (w0 - 1, z1, z0),
+ svadd_write_za64_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** add_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svint64x2_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x2 (w8, z0, z15),
+ svadd_write_za64_vg1x2 (w8, z0, z15))
+
+/*
+** add_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** add za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svint64x2_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x2 (w8, z20, z16),
+ svadd_write_za64_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_0_z0_z0, svint64x4_t,
+ svadd_write_za64_s64_vg1x4 (0, z0, z0),
+ svadd_write_za64_vg1x4 (0, z0, z0))
+
+/*
+** add_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w0_z0_z0, svint64x4_t,
+ svadd_write_za64_s64_vg1x4 (w0, z0, z0),
+ svadd_write_za64_vg1x4 (w0, z0, z0))
+
+/*
+** add_write_w8_z0_z4:
+** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z4, svint64x4_t,
+ svadd_write_za64_s64_vg1x4 (w8, z0, z4),
+ svadd_write_za64_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_write_w8_z0_z18:
+** ...
+** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z18, svint64x4_t,
+ svadd_write_za64_s64_vg1x4 (w8, z0, z18),
+ svadd_write_za64_vg1x4 (w8, z0, z18))
+
+/*
+** add_write_w8_z18_z28:
+** ...
+** add za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z18_z28, svint64x4_t,
+ svadd_write_za64_s64_vg1x4 (w8, z18, z28),
+ svadd_write_za64_vg1x4 (w8, z18, z28))
+
+/*
+** add_write_w8_z28_z23:
+** ...
+** add za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z28_z23, svint64x4_t,
+ svadd_write_za64_s64_vg1x4 (w8, z28, z23),
+ svadd_write_za64_vg1x4 (w8, z28, z23))
+
+/*
+** add_write_w8p7_z4_z0:
+** add za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p7_z4_z0, svint64x4_t,
+ svadd_write_za64_s64_vg1x4 (w8 + 7, z4, z0),
+ svadd_write_za64_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** add_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p8_z4_z4, svint64x4_t,
+ svadd_write_za64_s64_vg1x4 (w8 + 8, z4, z4),
+ svadd_write_za64_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** add_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8m1_z4_z0, svint64x4_t,
+ svadd_write_za64_s64_vg1x4 (w8 - 1, z4, z0),
+ svadd_write_za64_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** add_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_0_z1_z0, svint64x4_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x4 (0, z1, z0),
+ svadd_write_za64_vg1x4 (0, z1, z0))
+
+/*
+** add_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svint64x4_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x4 (w0, z1, z0),
+ svadd_write_za64_vg1x4 (w0, z1, z0))
+
+/*
+** add_write_single_w8_z1_z0:
+** add za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svint64x4_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x4 (w8, z1, z0),
+ svadd_write_za64_vg1x4 (w8, z1, z0))
+
+/*
+** add_write_single_w8p7_z1_z0:
+** add za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svint64x4_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x4 (w8 + 7, z1, z0),
+ svadd_write_za64_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** add_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svint64x4_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x4 (w8 + 8, z1, z0),
+ svadd_write_za64_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** add_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svint64x4_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x4 (w0 - 1, z1, z0),
+ svadd_write_za64_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** add_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svint64x4_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x4 (w8, z0, z15),
+ svadd_write_za64_vg1x4 (w8, z0, z15))
+
+/*
+** add_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** add za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svint64x4_t, svint64_t,
+ svadd_write_single_za64_s64_vg1x4 (w8, z20, z16),
+ svadd_write_za64_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_0_z0_z0, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (0, z0, z0),
+ svadd_write_za64_vg1x2 (0, z0, z0))
+
+/*
+** add_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w0_z0_z0, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (w0, z0, z0),
+ svadd_write_za64_vg1x2 (w0, z0, z0))
+
+/*
+** add_write_w8_z0_z4:
+** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z4, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (w8, z0, z4),
+ svadd_write_za64_vg1x2 (w8, z0, z4))
+
+/*
+** add_write_w8_z4_z18:
+** add za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z4_z18, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (w8, z4, z18),
+ svadd_write_za64_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_write_w8_z23_z0:
+** ...
+** add za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z23_z0, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (w8, z23, z0),
+ svadd_write_za64_vg1x2 (w8, z23, z0))
+
+/*
+** add_write_w8_z18_z23:
+** ...
+** add za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z18_z23, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (w8, z18, z23),
+ svadd_write_za64_vg1x2 (w8, z18, z23))
+
+/*
+** add_write_w8_z4_z28:
+** add za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z4_z28, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (w8, z4, z28),
+ svadd_write_za64_vg1x2 (w8, z4, z28))
+
+/*
+** add_write_w8p7_z4_z0:
+** add za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p7_z4_z0, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (w8 + 7, z4, z0),
+ svadd_write_za64_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** add_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p8_z4_z4, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (w8 + 8, z4, z4),
+ svadd_write_za64_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** add_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8m1_z4_z0, svuint64x2_t,
+ svadd_write_za64_u64_vg1x2 (w8 - 1, z4, z0),
+ svadd_write_za64_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** add_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_0_z1_z0, svuint64x2_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x2 (0, z1, z0),
+ svadd_write_za64_vg1x2 (0, z1, z0))
+
+/*
+** add_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svuint64x2_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x2 (w0, z1, z0),
+ svadd_write_za64_vg1x2 (w0, z1, z0))
+
+/*
+** add_write_single_w8_z1_z0:
+** add za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svuint64x2_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x2 (w8, z1, z0),
+ svadd_write_za64_vg1x2 (w8, z1, z0))
+
+/*
+** add_write_single_w8p7_z1_z0:
+** add za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svuint64x2_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x2 (w8 + 7, z1, z0),
+ svadd_write_za64_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** add_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svuint64x2_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x2 (w8 + 8, z1, z0),
+ svadd_write_za64_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** add_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svuint64x2_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x2 (w0 - 1, z1, z0),
+ svadd_write_za64_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** add_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svuint64x2_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x2 (w8, z0, z15),
+ svadd_write_za64_vg1x2 (w8, z0, z15))
+
+/*
+** add_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** add za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svuint64x2_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x2 (w8, z20, z16),
+ svadd_write_za64_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_0_z0_z0, svuint64x4_t,
+ svadd_write_za64_u64_vg1x4 (0, z0, z0),
+ svadd_write_za64_vg1x4 (0, z0, z0))
+
+/*
+** add_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w0_z0_z0, svuint64x4_t,
+ svadd_write_za64_u64_vg1x4 (w0, z0, z0),
+ svadd_write_za64_vg1x4 (w0, z0, z0))
+
+/*
+** add_write_w8_z0_z4:
+** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z4, svuint64x4_t,
+ svadd_write_za64_u64_vg1x4 (w8, z0, z4),
+ svadd_write_za64_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_write_w8_z0_z18:
+** ...
+** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z0_z18, svuint64x4_t,
+ svadd_write_za64_u64_vg1x4 (w8, z0, z18),
+ svadd_write_za64_vg1x4 (w8, z0, z18))
+
+/*
+** add_write_w8_z18_z28:
+** ...
+** add za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z18_z28, svuint64x4_t,
+ svadd_write_za64_u64_vg1x4 (w8, z18, z28),
+ svadd_write_za64_vg1x4 (w8, z18, z28))
+
+/*
+** add_write_w8_z28_z23:
+** ...
+** add za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_write_w8_z28_z23, svuint64x4_t,
+ svadd_write_za64_u64_vg1x4 (w8, z28, z23),
+ svadd_write_za64_vg1x4 (w8, z28, z23))
+
+/*
+** add_write_w8p7_z4_z0:
+** add za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p7_z4_z0, svuint64x4_t,
+ svadd_write_za64_u64_vg1x4 (w8 + 7, z4, z0),
+ svadd_write_za64_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** add_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8p8_z4_z4, svuint64x4_t,
+ svadd_write_za64_u64_vg1x4 (w8 + 8, z4, z4),
+ svadd_write_za64_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** add_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_write_w8m1_z4_z0, svuint64x4_t,
+ svadd_write_za64_u64_vg1x4 (w8 - 1, z4, z0),
+ svadd_write_za64_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** add_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_0_z1_z0, svuint64x4_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x4 (0, z1, z0),
+ svadd_write_za64_vg1x4 (0, z1, z0))
+
+/*
+** add_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svuint64x4_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x4 (w0, z1, z0),
+ svadd_write_za64_vg1x4 (w0, z1, z0))
+
+/*
+** add_write_single_w8_z1_z0:
+** add za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svuint64x4_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x4 (w8, z1, z0),
+ svadd_write_za64_vg1x4 (w8, z1, z0))
+
+/*
+** add_write_single_w8p7_z1_z0:
+** add za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svuint64x4_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x4 (w8 + 7, z1, z0),
+ svadd_write_za64_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** add_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svuint64x4_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x4 (w8 + 8, z1, z0),
+ svadd_write_za64_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** add_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svuint64x4_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x4 (w0 - 1, z1, z0),
+ svadd_write_za64_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** add_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svuint64x4_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x4 (w8, z0, z15),
+ svadd_write_za64_vg1x4 (w8, z0, z15))
+
+/*
+** add_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** add za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svuint64x4_t, svuint64_t,
+ svadd_write_single_za64_u64_vg1x4 (w8, z20, z16),
+ svadd_write_za64_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (0, z0),
+ svadd_za32_vg1x2 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w0, z0),
+ svadd_za32_vg1x2 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w7, z0),
+ svadd_za32_vg1x2 (w7, z0))
+
+/*
+** add_w8_z0:
+** fadd za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w8, z0),
+ svadd_za32_vg1x2 (w8, z0))
+
+/*
+** add_w11_z0:
+** fadd za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w11, z0),
+ svadd_za32_vg1x2 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w12, z0),
+ svadd_za32_vg1x2 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** fadd za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w8 + 7, z0),
+ svadd_za32_vg1x2 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w8 + 8, z0),
+ svadd_za32_vg1x2 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w8 - 1, z0),
+ svadd_za32_vg1x2 (w8 - 1, z0))
+
+/*
+** add_w8_z18:
+** fadd za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w8, z18),
+ svadd_za32_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** fadd za\.s\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w8, z23),
+ svadd_za32_vg1x2 (w8, z23))
+
+/*
+** add_w8_z28:
+** fadd za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svfloat32x2_t,
+ svadd_za32_f32_vg1x2 (w8, z28),
+ svadd_za32_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (0, z0),
+ svadd_za32_vg1x4 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w0, z0),
+ svadd_za32_vg1x4 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w7, z0),
+ svadd_za32_vg1x4 (w7, z0))
+
+/*
+** add_w8_z0:
+** fadd za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w8, z0),
+ svadd_za32_vg1x4 (w8, z0))
+
+/*
+** add_w11_z0:
+** fadd za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w11, z0),
+ svadd_za32_vg1x4 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w12, z0),
+ svadd_za32_vg1x4 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** fadd za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w8 + 7, z0),
+ svadd_za32_vg1x4 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w8 + 8, z0),
+ svadd_za32_vg1x4 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w8 - 1, z0),
+ svadd_za32_vg1x4 (w8 - 1, z0))
+
+/*
+** add_w8_z4:
+** fadd za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z4, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w8, z4),
+ svadd_za32_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fadd za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w8, z18),
+ svadd_za32_vg1x4 (w8, z18))
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fadd za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w8, z23),
+ svadd_za32_vg1x4 (w8, z23))
+
+/*
+** add_w8_z28:
+** fadd za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svfloat32x4_t,
+ svadd_za32_f32_vg1x4 (w8, z28),
+ svadd_za32_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svint32x2_t,
+ svadd_za32_s32_vg1x2 (0, z0),
+ svadd_za32_vg1x2 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w0, z0),
+ svadd_za32_vg1x2 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w7, z0),
+ svadd_za32_vg1x2 (w7, z0))
+
+/*
+** add_w8_z0:
+** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w8, z0),
+ svadd_za32_vg1x2 (w8, z0))
+
+/*
+** add_w11_z0:
+** add za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w11, z0),
+ svadd_za32_vg1x2 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w12, z0),
+ svadd_za32_vg1x2 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** add za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w8 + 7, z0),
+ svadd_za32_vg1x2 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w8 + 8, z0),
+ svadd_za32_vg1x2 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w8 - 1, z0),
+ svadd_za32_vg1x2 (w8 - 1, z0))
+
+/*
+** add_w8_z18:
+** add za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w8, z18),
+ svadd_za32_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** add za\.s\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w8, z23),
+ svadd_za32_vg1x2 (w8, z23))
+
+/*
+** add_w8_z28:
+** add za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svint32x2_t,
+ svadd_za32_s32_vg1x2 (w8, z28),
+ svadd_za32_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svint32x4_t,
+ svadd_za32_s32_vg1x4 (0, z0),
+ svadd_za32_vg1x4 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w0, z0),
+ svadd_za32_vg1x4 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w7, z0),
+ svadd_za32_vg1x4 (w7, z0))
+
+/*
+** add_w8_z0:
+** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w8, z0),
+ svadd_za32_vg1x4 (w8, z0))
+
+/*
+** add_w11_z0:
+** add za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w11, z0),
+ svadd_za32_vg1x4 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w12, z0),
+ svadd_za32_vg1x4 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** add za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w8 + 7, z0),
+ svadd_za32_vg1x4 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w8 + 8, z0),
+ svadd_za32_vg1x4 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w8 - 1, z0),
+ svadd_za32_vg1x4 (w8 - 1, z0))
+
+/*
+** add_w8_z4:
+** add za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z4, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w8, z4),
+ svadd_za32_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w8, z18),
+ svadd_za32_vg1x4 (w8, z18))
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w8, z23),
+ svadd_za32_vg1x4 (w8, z23))
+
+/*
+** add_w8_z28:
+** add za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svint32x4_t,
+ svadd_za32_s32_vg1x4 (w8, z28),
+ svadd_za32_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (0, z0),
+ svadd_za32_vg1x2 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w0, z0),
+ svadd_za32_vg1x2 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w7, z0),
+ svadd_za32_vg1x2 (w7, z0))
+
+/*
+** add_w8_z0:
+** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w8, z0),
+ svadd_za32_vg1x2 (w8, z0))
+
+/*
+** add_w11_z0:
+** add za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w11, z0),
+ svadd_za32_vg1x2 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w12, z0),
+ svadd_za32_vg1x2 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** add za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w8 + 7, z0),
+ svadd_za32_vg1x2 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w8 + 8, z0),
+ svadd_za32_vg1x2 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w8 - 1, z0),
+ svadd_za32_vg1x2 (w8 - 1, z0))
+
+/*
+** add_w8_z18:
+** add za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w8, z18),
+ svadd_za32_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** add za\.s\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w8, z23),
+ svadd_za32_vg1x2 (w8, z23))
+
+/*
+** add_w8_z28:
+** add za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svuint32x2_t,
+ svadd_za32_u32_vg1x2 (w8, z28),
+ svadd_za32_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (0, z0),
+ svadd_za32_vg1x4 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w0, z0),
+ svadd_za32_vg1x4 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w7, z0),
+ svadd_za32_vg1x4 (w7, z0))
+
+/*
+** add_w8_z0:
+** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w8, z0),
+ svadd_za32_vg1x4 (w8, z0))
+
+/*
+** add_w11_z0:
+** add za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w11, z0),
+ svadd_za32_vg1x4 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w12, z0),
+ svadd_za32_vg1x4 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** add za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w8 + 7, z0),
+ svadd_za32_vg1x4 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w8 + 8, z0),
+ svadd_za32_vg1x4 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w8 - 1, z0),
+ svadd_za32_vg1x4 (w8 - 1, z0))
+
+/*
+** add_w8_z4:
+** add za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z4, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w8, z4),
+ svadd_za32_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w8, z18),
+ svadd_za32_vg1x4 (w8, z18))
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w8, z23),
+ svadd_za32_vg1x4 (w8, z23))
+
+/*
+** add_w8_z28:
+** add za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svuint32x4_t,
+ svadd_za32_u32_vg1x4 (w8, z28),
+ svadd_za32_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (0, z0),
+ svadd_za64_vg1x2 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w0, z0),
+ svadd_za64_vg1x2 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w7, z0),
+ svadd_za64_vg1x2 (w7, z0))
+
+/*
+** add_w8_z0:
+** fadd za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w8, z0),
+ svadd_za64_vg1x2 (w8, z0))
+
+/*
+** add_w11_z0:
+** fadd za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w11, z0),
+ svadd_za64_vg1x2 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w12, z0),
+ svadd_za64_vg1x2 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** fadd za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w8 + 7, z0),
+ svadd_za64_vg1x2 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w8 + 8, z0),
+ svadd_za64_vg1x2 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w8 - 1, z0),
+ svadd_za64_vg1x2 (w8 - 1, z0))
+
+/*
+** add_w8_z18:
+** fadd za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w8, z18),
+ svadd_za64_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** fadd za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w8, z23),
+ svadd_za64_vg1x2 (w8, z23))
+
+/*
+** add_w8_z28:
+** fadd za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svfloat64x2_t,
+ svadd_za64_f64_vg1x2 (w8, z28),
+ svadd_za64_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (0, z0),
+ svadd_za64_vg1x4 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w0, z0),
+ svadd_za64_vg1x4 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w7, z0),
+ svadd_za64_vg1x4 (w7, z0))
+
+/*
+** add_w8_z0:
+** fadd za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w8, z0),
+ svadd_za64_vg1x4 (w8, z0))
+
+/*
+** add_w11_z0:
+** fadd za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w11, z0),
+ svadd_za64_vg1x4 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w12, z0),
+ svadd_za64_vg1x4 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** fadd za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w8 + 7, z0),
+ svadd_za64_vg1x4 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w8 + 8, z0),
+ svadd_za64_vg1x4 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w8 - 1, z0),
+ svadd_za64_vg1x4 (w8 - 1, z0))
+
+/*
+** add_w8_z4:
+** fadd za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z4, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w8, z4),
+ svadd_za64_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fadd za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w8, z18),
+ svadd_za64_vg1x4 (w8, z18))
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fadd za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w8, z23),
+ svadd_za64_vg1x4 (w8, z23))
+
+/*
+** add_w8_z28:
+** fadd za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svfloat64x4_t,
+ svadd_za64_f64_vg1x4 (w8, z28),
+ svadd_za64_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svint64x2_t,
+ svadd_za64_s64_vg1x2 (0, z0),
+ svadd_za64_vg1x2 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w0, z0),
+ svadd_za64_vg1x2 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w7, z0),
+ svadd_za64_vg1x2 (w7, z0))
+
+/*
+** add_w8_z0:
+** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w8, z0),
+ svadd_za64_vg1x2 (w8, z0))
+
+/*
+** add_w11_z0:
+** add za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w11, z0),
+ svadd_za64_vg1x2 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w12, z0),
+ svadd_za64_vg1x2 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** add za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w8 + 7, z0),
+ svadd_za64_vg1x2 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w8 + 8, z0),
+ svadd_za64_vg1x2 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w8 - 1, z0),
+ svadd_za64_vg1x2 (w8 - 1, z0))
+
+/*
+** add_w8_z18:
+** add za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w8, z18),
+ svadd_za64_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** add za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w8, z23),
+ svadd_za64_vg1x2 (w8, z23))
+
+/*
+** add_w8_z28:
+** add za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svint64x2_t,
+ svadd_za64_s64_vg1x2 (w8, z28),
+ svadd_za64_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svint64x4_t,
+ svadd_za64_s64_vg1x4 (0, z0),
+ svadd_za64_vg1x4 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w0, z0),
+ svadd_za64_vg1x4 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w7, z0),
+ svadd_za64_vg1x4 (w7, z0))
+
+/*
+** add_w8_z0:
+** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w8, z0),
+ svadd_za64_vg1x4 (w8, z0))
+
+/*
+** add_w11_z0:
+** add za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w11, z0),
+ svadd_za64_vg1x4 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w12, z0),
+ svadd_za64_vg1x4 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** add za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w8 + 7, z0),
+ svadd_za64_vg1x4 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w8 + 8, z0),
+ svadd_za64_vg1x4 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w8 - 1, z0),
+ svadd_za64_vg1x4 (w8 - 1, z0))
+
+/*
+** add_w8_z4:
+** add za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z4, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w8, z4),
+ svadd_za64_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w8, z18),
+ svadd_za64_vg1x4 (w8, z18))
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w8, z23),
+ svadd_za64_vg1x4 (w8, z23))
+
+/*
+** add_w8_z28:
+** add za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svint64x4_t,
+ svadd_za64_s64_vg1x4 (w8, z28),
+ svadd_za64_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (0, z0),
+ svadd_za64_vg1x2 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w0, z0),
+ svadd_za64_vg1x2 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w7, z0),
+ svadd_za64_vg1x2 (w7, z0))
+
+/*
+** add_w8_z0:
+** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w8, z0),
+ svadd_za64_vg1x2 (w8, z0))
+
+/*
+** add_w11_z0:
+** add za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w11, z0),
+ svadd_za64_vg1x2 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w12, z0),
+ svadd_za64_vg1x2 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** add za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w8 + 7, z0),
+ svadd_za64_vg1x2 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w8 + 8, z0),
+ svadd_za64_vg1x2 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w8 - 1, z0),
+ svadd_za64_vg1x2 (w8 - 1, z0))
+
+/*
+** add_w8_z18:
+** add za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w8, z18),
+ svadd_za64_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** add za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w8, z23),
+ svadd_za64_vg1x2 (w8, z23))
+
+/*
+** add_w8_z28:
+** add za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svuint64x2_t,
+ svadd_za64_u64_vg1x2 (w8, z28),
+ svadd_za64_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** add_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_0_z0, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (0, z0),
+ svadd_za64_vg1x4 (0, z0))
+
+/*
+** add_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w0_z0, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w0, z0),
+ svadd_za64_vg1x4 (w0, z0))
+
+/*
+** add_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w7_z0, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w7, z0),
+ svadd_za64_vg1x4 (w7, z0))
+
+/*
+** add_w8_z0:
+** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z0, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w8, z0),
+ svadd_za64_vg1x4 (w8, z0))
+
+/*
+** add_w11_z0:
+** add za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w11_z0, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w11, z0),
+ svadd_za64_vg1x4 (w11, z0))
+
+
+/*
+** add_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w12_z0, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w12, z0),
+ svadd_za64_vg1x4 (w12, z0))
+
+/*
+** add_w8p7_z0:
+** add za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w8 + 7, z0),
+ svadd_za64_vg1x4 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w8 + 8, z0),
+ svadd_za64_vg1x4 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w8 - 1, z0),
+ svadd_za64_vg1x4 (w8 - 1, z0))
+
+/*
+** add_w8_z4:
+** add za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z4, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w8, z4),
+ svadd_za64_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** add_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z18, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w8, z18),
+ svadd_za64_vg1x4 (w8, z18))
+
+/*
+** add_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** add za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (add_w8_z23, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w8, z23),
+ svadd_za64_vg1x4 (w8, z23))
+
+/*
+** add_w8_z28:
+** add za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (add_w8_z28, svuint64x4_t,
+ svadd_za64_u64_vg1x4 (w8, z28),
+ svadd_za64_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** bfmlalb_f32_tied1:
+** bfmlalb z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalb_f32 (z0, z4, z5),
+ z0 = svbfmlalb (z0, z4, z5))
+
+/*
+** bfmlalb_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalb z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalb_f32 (z4, z0, z1),
+ z0_res = svbfmlalb (z4, z0, z1))
+
+/*
+** bfmlalb_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalb z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalb_f32 (z4, z1, z0),
+ z0_res = svbfmlalb (z4, z1, z0))
+
+/*
+** bfmlalb_f32_untied:
+** movprfx z0, z1
+** bfmlalb z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalb_f32 (z1, z4, z5),
+ z0 = svbfmlalb (z1, z4, z5))
+
+/*
+** bfmlalb_h7_f32_tied1:
+** mov (z[0-9]+\.h), h7
+** bfmlalb z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlalb_n_f32 (z0, z4, d7),
+ z0 = svbfmlalb (z0, z4, d7))
+
+/*
+** bfmlalb_h7_f32_untied:
+** mov (z[0-9]+\.h), h7
+** movprfx z0, z1
+** bfmlalb z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlalb_n_f32 (z1, z4, d7),
+ z0 = svbfmlalb (z1, z4, d7))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** bfmlslb_lane_0_f32_tied1:
+** bfmlslb z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z0, z4, z5, 0),
+ z0 = svbfmlslb_lane (z0, z4, z5, 0))
+
+/*
+** bfmlslb_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_lane_f32 (z4, z0, z1, 0),
+ z0_res = svbfmlslb_lane (z4, z0, z1, 0))
+
+/*
+** bfmlslb_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_lane_f32 (z4, z1, z0, 0),
+ z0_res = svbfmlslb_lane (z4, z1, z0, 0))
+
+/*
+** bfmlslb_lane_0_f32_untied:
+** movprfx z0, z1
+** bfmlslb z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z1, z4, z5, 0),
+ z0 = svbfmlslb_lane (z1, z4, z5, 0))
+
+/*
+** bfmlslb_lane_1_f32:
+** bfmlslb z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_1_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z0, z4, z5, 1),
+ z0 = svbfmlslb_lane (z0, z4, z5, 1))
+
+/*
+** bfmlslb_lane_7_f32:
+** bfmlslb z0\.s, z4\.h, z5\.h\[7\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_7_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z0, z4, z5, 7),
+ z0 = svbfmlslb_lane (z0, z4, z5, 7))
+
+/*
+** bfmlslb_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** bfmlslb z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+ z0 = svbfmlslb_lane_f32 (z0, z1, z8, 1),
+ z0 = svbfmlslb_lane (z0, z1, z8, 1))
+
+/*
+** bfmlslb_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** bfmlslb z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+ z0 = svbfmlslb_lane_f32 (z0, z1, z16, 1),
+ z0 = svbfmlslb_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** bfmlslt_f32_tied1:
+** bfmlslt z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_f32 (z0, z4, z5),
+ z0 = svbfmlslt (z0, z4, z5))
+
+/*
+** bfmlslt_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_f32 (z4, z0, z1),
+ z0_res = svbfmlslt (z4, z0, z1))
+
+/*
+** bfmlslt_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_f32 (z4, z1, z0),
+ z0_res = svbfmlslt (z4, z1, z0))
+
+/*
+** bfmlslt_f32_untied:
+** movprfx z0, z1
+** bfmlslt z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_f32 (z1, z4, z5),
+ z0 = svbfmlslt (z1, z4, z5))
+
+/*
+** bfmlslt_h7_f32_tied1:
+** mov (z[0-9]+\.h), h7
+** bfmlslt z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslt_n_f32 (z0, z4, d7),
+ z0 = svbfmlslt (z0, z4, d7))
+
+/*
+** bfmlslt_h7_f32_untied:
+** mov (z[0-9]+\.h), h7
+** movprfx z0, z1
+** bfmlslt z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslt_n_f32 (z1, z4, d7),
+ z0 = svbfmlslt (z1, z4, d7))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** bfmlslt_lane_0_f32_tied1:
+** bfmlslt z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z0, z4, z5, 0),
+ z0 = svbfmlslt_lane (z0, z4, z5, 0))
+
+/*
+** bfmlslt_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_lane_f32 (z4, z0, z1, 0),
+ z0_res = svbfmlslt_lane (z4, z0, z1, 0))
+
+/*
+** bfmlslt_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_lane_f32 (z4, z1, z0, 0),
+ z0_res = svbfmlslt_lane (z4, z1, z0, 0))
+
+/*
+** bfmlslt_lane_0_f32_untied:
+** movprfx z0, z1
+** bfmlslt z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z1, z4, z5, 0),
+ z0 = svbfmlslt_lane (z1, z4, z5, 0))
+
+/*
+** bfmlslt_lane_1_f32:
+** bfmlslt z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_1_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z0, z4, z5, 1),
+ z0 = svbfmlslt_lane (z0, z4, z5, 1))
+
+/*
+** bfmlslt_lane_7_f32:
+** bfmlslt z0\.s, z4\.h, z5\.h\[7\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_7_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z0, z4, z5, 7),
+ z0 = svbfmlslt_lane (z0, z4, z5, 7))
+
+/*
+** bfmlslt_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** bfmlslt z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+ z0 = svbfmlslt_lane_f32 (z0, z1, z8, 1),
+ z0 = svbfmlslt_lane (z0, z1, z8, 1))
+
+/*
+** bfmlslt_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** bfmlslt z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+ z0 = svbfmlslt_lane_f32 (z0, z1, z16, 1),
+ z0 = svbfmlslt_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** bmopa_za32_u32_0_p0_p1_z0_z1:
+** bmopa za0\.s, p0/m, p1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (bmopa_za32_u32_0_p0_p1_z0_z1, svuint32_t,
+ svbmopa_za32_u32_m (0, p0, p1, z0, z1),
+ svbmopa_za32_m (0, p0, p1, z0, z1))
+
+/*
+** bmopa_za32_u32_0_p1_p0_z1_z0:
+** bmopa za0\.s, p1/m, p0/m, z1\.s, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (bmopa_za32_u32_0_p1_p0_z1_z0, svuint32_t,
+ svbmopa_za32_u32_m (0, p1, p0, z1, z0),
+ svbmopa_za32_m (0, p1, p0, z1, z0))
+
+/*
+** bmopa_za32_u32_3_p0_p1_z0_z1:
+** bmopa za3\.s, p0/m, p1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (bmopa_za32_u32_3_p0_p1_z0_z1, svuint32_t,
+ svbmopa_za32_u32_m (3, p0, p1, z0, z1),
+ svbmopa_za32_m (3, p0, p1, z0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** bmops_za32_u32_0_p0_p1_z0_z1:
+** bmops za0\.s, p0/m, p1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (bmops_za32_u32_0_p0_p1_z0_z1, svuint32_t,
+ svbmops_za32_u32_m (0, p0, p1, z0, z1),
+ svbmops_za32_m (0, p0, p1, z0, z1))
+
+/*
+** bmops_za32_u32_0_p1_p0_z1_z0:
+** bmops za0\.s, p1/m, p0/m, z1\.s, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (bmops_za32_u32_0_p1_p0_z1_z0, svuint32_t,
+ svbmops_za32_u32_m (0, p1, p0, z1, z0),
+ svbmops_za32_m (0, p1, p0, z1, z0))
+
+/*
+** bmops_za32_u32_3_p0_p1_z0_z1:
+** bmops za3\.s, p0/m, p1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (bmops_za32_u32_3_p0_p1_z0_z1, svuint32_t,
+ svbmops_za32_u32_m (3, p0, p1, z0, z1),
+ svbmops_za32_m (3, p0, p1, z0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_f16_tied1:
+** fclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_tied1, svfloat16_t,
+ z0 = svclamp_f16 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_f16_tied2:
+** fclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_tied2, svfloat16_t,
+ z0 = svclamp_f16 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_f16_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** fclamp z0\.h, z2\.h, \1\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_tied3, svfloat16_t,
+ z0 = svclamp_f16 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_f16_untied:
+** movprfx z0, z1
+** fclamp z0\.h, z2\.h, z3\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_untied, svfloat16_t,
+ z0 = svclamp_f16 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** fclamp {z24\.h - z25\.h}, z0\.h, z5\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat16x2_t, svfloat16_t, z24,
+ svclamp_single_f16_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z24\.h - z25\.h}, z5\.h, z7\.h
+** |
+** fclamp {z28\.h - z29\.h}, z5\.h, z7\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat16x2_t, svfloat16_t, z24,
+ svclamp_single_f16_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fclamp {z24\.h - z25\.h}, z7\.h, z16\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat16x2_t, svfloat16_t, z24,
+ svclamp_single_f16_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** fclamp {z24\.h - z25\.h}, z16\.h, z23\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat16x2_t, svfloat16_t, z1,
+ svclamp_single_f16_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat16x2_t, svfloat16_t, z1,
+ svclamp_single_f16_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** fclamp {z18\.h - z19\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svfloat16x2_t, svfloat16_t, z18,
+ svclamp_single_f16_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z3\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat16x2_t, svfloat16_t,
+ z0_res = svclamp_single_f16_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** fclamp {z24\.h - z27\.h}, z0\.h, z5\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat16x4_t, svfloat16_t, z24,
+ svclamp_single_f16_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z24\.h - z27\.h}, z5\.h, z7\.h
+** |
+** fclamp {z28\.h - z31\.h}, z5\.h, z7\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat16x4_t, svfloat16_t, z24,
+ svclamp_single_f16_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z24\.h - z27\.h}, z7\.h, z16\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat16x4_t, svfloat16_t, z24,
+ svclamp_single_f16_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** fclamp {z24\.h - z27\.h}, z16\.h, z23\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat16x4_t, svfloat16_t, z1,
+ svclamp_single_f16_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat16x4_t, svfloat16_t, z1,
+ svclamp_single_f16_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z16\.h, z5\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svfloat16x4_t, svfloat16_t, z18,
+ svclamp_single_f16_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z5\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat16x4_t, svfloat16_t,
+ z0_res = svclamp_single_f16_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_f32_tied1:
+** fclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_tied1, svfloat32_t,
+ z0 = svclamp_f32 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_f32_tied2:
+** fclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_tied2, svfloat32_t,
+ z0 = svclamp_f32 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** fclamp z0\.s, z2\.s, \1\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_tied3, svfloat32_t,
+ z0 = svclamp_f32 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_f32_untied:
+** movprfx z0, z1
+** fclamp z0\.s, z2\.s, z3\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_untied, svfloat32_t,
+ z0 = svclamp_f32 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** fclamp {z24\.s - z25\.s}, z0\.s, z5\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat32x2_t, svfloat32_t, z24,
+ svclamp_single_f32_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z24\.s - z25\.s}, z5\.s, z7\.s
+** |
+** fclamp {z28\.s - z29\.s}, z5\.s, z7\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat32x2_t, svfloat32_t, z24,
+ svclamp_single_f32_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fclamp {z24\.s - z25\.s}, z7\.s, z16\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat32x2_t, svfloat32_t, z24,
+ svclamp_single_f32_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** fclamp {z24\.s - z25\.s}, z16\.s, z23\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat32x2_t, svfloat32_t, z1,
+ svclamp_single_f32_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat32x2_t, svfloat32_t, z1,
+ svclamp_single_f32_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** fclamp {z18\.s - z19\.s}, z0\.s, z23\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svfloat32x2_t, svfloat32_t, z18,
+ svclamp_single_f32_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z3\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat32x2_t, svfloat32_t,
+ z0_res = svclamp_single_f32_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** fclamp {z24\.s - z27\.s}, z0\.s, z5\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat32x4_t, svfloat32_t, z24,
+ svclamp_single_f32_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z24\.s - z27\.s}, z5\.s, z7\.s
+** |
+** fclamp {z28\.s - z31\.s}, z5\.s, z7\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat32x4_t, svfloat32_t, z24,
+ svclamp_single_f32_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z24\.s - z27\.s}, z7\.s, z16\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat32x4_t, svfloat32_t, z24,
+ svclamp_single_f32_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** fclamp {z24\.s - z27\.s}, z16\.s, z23\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat32x4_t, svfloat32_t, z1,
+ svclamp_single_f32_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat32x4_t, svfloat32_t, z1,
+ svclamp_single_f32_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z16\.s, z5\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svfloat32x4_t, svfloat32_t, z18,
+ svclamp_single_f32_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z5\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat32x4_t, svfloat32_t,
+ z0_res = svclamp_single_f32_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_f64_tied1:
+** fclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_tied1, svfloat64_t,
+ z0 = svclamp_f64 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_f64_tied2:
+** fclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_tied2, svfloat64_t,
+ z0 = svclamp_f64 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_f64_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** fclamp z0\.d, z2\.d, \1\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_tied3, svfloat64_t,
+ z0 = svclamp_f64 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_f64_untied:
+** movprfx z0, z1
+** fclamp z0\.d, z2\.d, z3\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_untied, svfloat64_t,
+ z0 = svclamp_f64 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** fclamp {z24\.d - z25\.d}, z0\.d, z5\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat64x2_t, svfloat64_t, z24,
+ svclamp_single_f64_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z24\.d - z25\.d}, z5\.d, z7\.d
+** |
+** fclamp {z28\.d - z29\.d}, z5\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat64x2_t, svfloat64_t, z24,
+ svclamp_single_f64_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fclamp {z24\.d - z25\.d}, z7\.d, z16\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat64x2_t, svfloat64_t, z24,
+ svclamp_single_f64_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** fclamp {z24\.d - z25\.d}, z16\.d, z23\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat64x2_t, svfloat64_t, z1,
+ svclamp_single_f64_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat64x2_t, svfloat64_t, z1,
+ svclamp_single_f64_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** fclamp {z18\.d - z19\.d}, z0\.d, z23\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svfloat64x2_t, svfloat64_t, z18,
+ svclamp_single_f64_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z3\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat64x2_t, svfloat64_t,
+ z0_res = svclamp_single_f64_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** fclamp {z24\.d - z27\.d}, z0\.d, z5\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat64x4_t, svfloat64_t, z24,
+ svclamp_single_f64_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z24\.d - z27\.d}, z5\.d, z7\.d
+** |
+** fclamp {z28\.d - z31\.d}, z5\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat64x4_t, svfloat64_t, z24,
+ svclamp_single_f64_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z24\.d - z27\.d}, z7\.d, z16\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat64x4_t, svfloat64_t, z24,
+ svclamp_single_f64_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** fclamp {z24\.d - z27\.d}, z16\.d, z23\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat64x4_t, svfloat64_t, z1,
+ svclamp_single_f64_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat64x4_t, svfloat64_t, z1,
+ svclamp_single_f64_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z16\.d, z5\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svfloat64x4_t, svfloat64_t, z18,
+ svclamp_single_f64_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z5\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat64x4_t, svfloat64_t,
+ z0_res = svclamp_single_f64_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** sclamp {z24\.h - z25\.h}, z0\.h, z5\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint16x2_t, svint16_t, z24,
+ svclamp_single_s16_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.h - z25\.h}, z5\.h, z7\.h
+** |
+** sclamp {z28\.h - z29\.h}, z5\.h, z7\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint16x2_t, svint16_t, z24,
+ svclamp_single_s16_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** sclamp {z24\.h - z25\.h}, z7\.h, z16\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint16x2_t, svint16_t, z24,
+ svclamp_single_s16_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** sclamp {z24\.h - z25\.h}, z16\.h, z23\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint16x2_t, svint16_t, z1,
+ svclamp_single_s16_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint16x2_t, svint16_t, z1,
+ svclamp_single_s16_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** sclamp {z18\.h - z19\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svint16x2_t, svint16_t, z18,
+ svclamp_single_s16_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z3\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint16x2_t, svint16_t,
+ z0_res = svclamp_single_s16_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** sclamp {z24\.h - z27\.h}, z0\.h, z5\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint16x4_t, svint16_t, z24,
+ svclamp_single_s16_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.h - z27\.h}, z5\.h, z7\.h
+** |
+** sclamp {z28\.h - z31\.h}, z5\.h, z7\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint16x4_t, svint16_t, z24,
+ svclamp_single_s16_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.h - z27\.h}, z7\.h, z16\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint16x4_t, svint16_t, z24,
+ svclamp_single_s16_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** sclamp {z24\.h - z27\.h}, z16\.h, z23\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint16x4_t, svint16_t, z1,
+ svclamp_single_s16_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint16x4_t, svint16_t, z1,
+ svclamp_single_s16_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z16\.h, z5\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svint16x4_t, svint16_t, z18,
+ svclamp_single_s16_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z5\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint16x4_t, svint16_t,
+ z0_res = svclamp_single_s16_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** sclamp {z24\.s - z25\.s}, z0\.s, z5\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint32x2_t, svint32_t, z24,
+ svclamp_single_s32_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.s - z25\.s}, z5\.s, z7\.s
+** |
+** sclamp {z28\.s - z29\.s}, z5\.s, z7\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint32x2_t, svint32_t, z24,
+ svclamp_single_s32_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** sclamp {z24\.s - z25\.s}, z7\.s, z16\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint32x2_t, svint32_t, z24,
+ svclamp_single_s32_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** sclamp {z24\.s - z25\.s}, z16\.s, z23\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint32x2_t, svint32_t, z1,
+ svclamp_single_s32_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint32x2_t, svint32_t, z1,
+ svclamp_single_s32_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** sclamp {z18\.s - z19\.s}, z0\.s, z23\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svint32x2_t, svint32_t, z18,
+ svclamp_single_s32_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z3\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint32x2_t, svint32_t,
+ z0_res = svclamp_single_s32_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** sclamp {z24\.s - z27\.s}, z0\.s, z5\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint32x4_t, svint32_t, z24,
+ svclamp_single_s32_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.s - z27\.s}, z5\.s, z7\.s
+** |
+** sclamp {z28\.s - z31\.s}, z5\.s, z7\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint32x4_t, svint32_t, z24,
+ svclamp_single_s32_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.s - z27\.s}, z7\.s, z16\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint32x4_t, svint32_t, z24,
+ svclamp_single_s32_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** sclamp {z24\.s - z27\.s}, z16\.s, z23\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint32x4_t, svint32_t, z1,
+ svclamp_single_s32_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint32x4_t, svint32_t, z1,
+ svclamp_single_s32_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z16\.s, z5\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svint32x4_t, svint32_t, z18,
+ svclamp_single_s32_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z5\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint32x4_t, svint32_t,
+ z0_res = svclamp_single_s32_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** sclamp {z24\.d - z25\.d}, z0\.d, z5\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint64x2_t, svint64_t, z24,
+ svclamp_single_s64_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.d - z25\.d}, z5\.d, z7\.d
+** |
+** sclamp {z28\.d - z29\.d}, z5\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint64x2_t, svint64_t, z24,
+ svclamp_single_s64_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** sclamp {z24\.d - z25\.d}, z7\.d, z16\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint64x2_t, svint64_t, z24,
+ svclamp_single_s64_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** sclamp {z24\.d - z25\.d}, z16\.d, z23\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint64x2_t, svint64_t, z1,
+ svclamp_single_s64_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint64x2_t, svint64_t, z1,
+ svclamp_single_s64_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** sclamp {z18\.d - z19\.d}, z0\.d, z23\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svint64x2_t, svint64_t, z18,
+ svclamp_single_s64_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z3\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint64x2_t, svint64_t,
+ z0_res = svclamp_single_s64_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** sclamp {z24\.d - z27\.d}, z0\.d, z5\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint64x4_t, svint64_t, z24,
+ svclamp_single_s64_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.d - z27\.d}, z5\.d, z7\.d
+** |
+** sclamp {z28\.d - z31\.d}, z5\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint64x4_t, svint64_t, z24,
+ svclamp_single_s64_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.d - z27\.d}, z7\.d, z16\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint64x4_t, svint64_t, z24,
+ svclamp_single_s64_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** sclamp {z24\.d - z27\.d}, z16\.d, z23\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint64x4_t, svint64_t, z1,
+ svclamp_single_s64_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint64x4_t, svint64_t, z1,
+ svclamp_single_s64_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z16\.d, z5\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svint64x4_t, svint64_t, z18,
+ svclamp_single_s64_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z5\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint64x4_t, svint64_t,
+ z0_res = svclamp_single_s64_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** sclamp {z24\.b - z25\.b}, z0\.b, z5\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint8x2_t, svint8_t, z24,
+ svclamp_single_s8_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.b - z25\.b}, z5\.b, z7\.b
+** |
+** sclamp {z28\.b - z29\.b}, z5\.b, z7\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint8x2_t, svint8_t, z24,
+ svclamp_single_s8_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** sclamp {z24\.b - z25\.b}, z7\.b, z16\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint8x2_t, svint8_t, z24,
+ svclamp_single_s8_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** sclamp {z24\.b - z25\.b}, z16\.b, z23\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint8x2_t, svint8_t, z1,
+ svclamp_single_s8_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z23\.b, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint8x2_t, svint8_t, z1,
+ svclamp_single_s8_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** sclamp {z18\.b - z19\.b}, z0\.b, z23\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svint8x2_t, svint8_t, z18,
+ svclamp_single_s8_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z[0-9]+\.b, z3\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint8x2_t, svint8_t,
+ z0_res = svclamp_single_s8_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** sclamp {z24\.b - z27\.b}, z0\.b, z5\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint8x4_t, svint8_t, z24,
+ svclamp_single_s8_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.b - z27\.b}, z5\.b, z7\.b
+** |
+** sclamp {z28\.b - z31\.b}, z5\.b, z7\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint8x4_t, svint8_t, z24,
+ svclamp_single_s8_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z24\.b - z27\.b}, z7\.b, z16\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint8x4_t, svint8_t, z24,
+ svclamp_single_s8_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** sclamp {z24\.b - z27\.b}, z16\.b, z23\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint8x4_t, svint8_t, z1,
+ svclamp_single_s8_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z23\.b, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint8x4_t, svint8_t, z1,
+ svclamp_single_s8_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z16\.b, z5\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svint8x4_t, svint8_t, z18,
+ svclamp_single_s8_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z[0-9]+\.b, z5\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint8x4_t, svint8_t,
+ z0_res = svclamp_single_s8_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** uclamp {z24\.h - z25\.h}, z0\.h, z5\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint16x2_t, svuint16_t, z24,
+ svclamp_single_u16_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.h - z25\.h}, z5\.h, z7\.h
+** |
+** uclamp {z28\.h - z29\.h}, z5\.h, z7\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint16x2_t, svuint16_t, z24,
+ svclamp_single_u16_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** uclamp {z24\.h - z25\.h}, z7\.h, z16\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint16x2_t, svuint16_t, z24,
+ svclamp_single_u16_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** uclamp {z24\.h - z25\.h}, z16\.h, z23\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint16x2_t, svuint16_t, z1,
+ svclamp_single_u16_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint16x2_t, svuint16_t, z1,
+ svclamp_single_u16_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** uclamp {z18\.h - z19\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svuint16x2_t, svuint16_t, z18,
+ svclamp_single_u16_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z3\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint16x2_t, svuint16_t,
+ z0_res = svclamp_single_u16_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** uclamp {z24\.h - z27\.h}, z0\.h, z5\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint16x4_t, svuint16_t, z24,
+ svclamp_single_u16_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.h - z27\.h}, z5\.h, z7\.h
+** |
+** uclamp {z28\.h - z31\.h}, z5\.h, z7\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint16x4_t, svuint16_t, z24,
+ svclamp_single_u16_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.h - z27\.h}, z7\.h, z16\.h
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint16x4_t, svuint16_t, z24,
+ svclamp_single_u16_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** uclamp {z24\.h - z27\.h}, z16\.h, z23\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint16x4_t, svuint16_t, z1,
+ svclamp_single_u16_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint16x4_t, svuint16_t, z1,
+ svclamp_single_u16_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z16\.h, z5\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svuint16x4_t, svuint16_t, z18,
+ svclamp_single_u16_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z5\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint16x4_t, svuint16_t,
+ z0_res = svclamp_single_u16_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** uclamp {z24\.s - z25\.s}, z0\.s, z5\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint32x2_t, svuint32_t, z24,
+ svclamp_single_u32_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.s - z25\.s}, z5\.s, z7\.s
+** |
+** uclamp {z28\.s - z29\.s}, z5\.s, z7\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint32x2_t, svuint32_t, z24,
+ svclamp_single_u32_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** uclamp {z24\.s - z25\.s}, z7\.s, z16\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint32x2_t, svuint32_t, z24,
+ svclamp_single_u32_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** uclamp {z24\.s - z25\.s}, z16\.s, z23\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint32x2_t, svuint32_t, z1,
+ svclamp_single_u32_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint32x2_t, svuint32_t, z1,
+ svclamp_single_u32_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** uclamp {z18\.s - z19\.s}, z0\.s, z23\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svuint32x2_t, svuint32_t, z18,
+ svclamp_single_u32_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z3\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint32x2_t, svuint32_t,
+ z0_res = svclamp_single_u32_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** uclamp {z24\.s - z27\.s}, z0\.s, z5\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint32x4_t, svuint32_t, z24,
+ svclamp_single_u32_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.s - z27\.s}, z5\.s, z7\.s
+** |
+** uclamp {z28\.s - z31\.s}, z5\.s, z7\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint32x4_t, svuint32_t, z24,
+ svclamp_single_u32_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.s - z27\.s}, z7\.s, z16\.s
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint32x4_t, svuint32_t, z24,
+ svclamp_single_u32_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** uclamp {z24\.s - z27\.s}, z16\.s, z23\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint32x4_t, svuint32_t, z1,
+ svclamp_single_u32_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint32x4_t, svuint32_t, z1,
+ svclamp_single_u32_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z16\.s, z5\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svuint32x4_t, svuint32_t, z18,
+ svclamp_single_u32_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z5\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint32x4_t, svuint32_t,
+ z0_res = svclamp_single_u32_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** uclamp {z24\.d - z25\.d}, z0\.d, z5\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint64x2_t, svuint64_t, z24,
+ svclamp_single_u64_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.d - z25\.d}, z5\.d, z7\.d
+** |
+** uclamp {z28\.d - z29\.d}, z5\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint64x2_t, svuint64_t, z24,
+ svclamp_single_u64_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** uclamp {z24\.d - z25\.d}, z7\.d, z16\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint64x2_t, svuint64_t, z24,
+ svclamp_single_u64_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** uclamp {z24\.d - z25\.d}, z16\.d, z23\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint64x2_t, svuint64_t, z1,
+ svclamp_single_u64_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint64x2_t, svuint64_t, z1,
+ svclamp_single_u64_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** uclamp {z18\.d - z19\.d}, z0\.d, z23\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svuint64x2_t, svuint64_t, z18,
+ svclamp_single_u64_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z3\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint64x2_t, svuint64_t,
+ z0_res = svclamp_single_u64_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** uclamp {z24\.d - z27\.d}, z0\.d, z5\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint64x4_t, svuint64_t, z24,
+ svclamp_single_u64_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.d - z27\.d}, z5\.d, z7\.d
+** |
+** uclamp {z28\.d - z31\.d}, z5\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint64x4_t, svuint64_t, z24,
+ svclamp_single_u64_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.d - z27\.d}, z7\.d, z16\.d
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint64x4_t, svuint64_t, z24,
+ svclamp_single_u64_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** uclamp {z24\.d - z27\.d}, z16\.d, z23\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint64x4_t, svuint64_t, z1,
+ svclamp_single_u64_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint64x4_t, svuint64_t, z1,
+ svclamp_single_u64_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z16\.d, z5\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svuint64x4_t, svuint64_t, z18,
+ svclamp_single_u64_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z5\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint64x4_t, svuint64_t,
+ z0_res = svclamp_single_u64_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** uclamp {z24\.b - z25\.b}, z0\.b, z5\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint8x2_t, svuint8_t, z24,
+ svclamp_single_u8_x2 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.b - z25\.b}, z5\.b, z7\.b
+** |
+** uclamp {z28\.b - z29\.b}, z5\.b, z7\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint8x2_t, svuint8_t, z24,
+ svclamp_single_u8_x2 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** uclamp {z24\.b - z25\.b}, z7\.b, z16\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint8x2_t, svuint8_t, z24,
+ svclamp_single_u8_x2 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** uclamp {z24\.b - z25\.b}, z16\.b, z23\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint8x2_t, svuint8_t, z1,
+ svclamp_single_u8_x2 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z23\.b, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint8x2_t, svuint8_t, z1,
+ svclamp_single_u8_x2 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z0_z23:
+** uclamp {z18\.b - z19\.b}, z0\.b, z23\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svuint8x2_t, svuint8_t, z18,
+ svclamp_single_u8_x2 (z18, z0, z23),
+ svclamp (z18, z0, z23))
+
+/*
+** clamp_awkward:
+** ...
+** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z[0-9]+\.b, z3\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint8x2_t, svuint8_t,
+ z0_res = svclamp_single_u8_x2 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** clamp_z24_z24_z0_z5:
+** uclamp {z24\.b - z27\.b}, z0\.b, z5\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint8x4_t, svuint8_t, z24,
+ svclamp_single_u8_x4 (z24, z0, z5),
+ svclamp (z24, z0, z5))
+
+/*
+** clamp_z24_z28_z5_z7:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.b - z27\.b}, z5\.b, z7\.b
+** |
+** uclamp {z28\.b - z31\.b}, z5\.b, z7\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint8x4_t, svuint8_t, z24,
+ svclamp_single_u8_x4 (z28, z5, z7),
+ svclamp (z28, z5, z7))
+
+/*
+** clamp_z24_z1_z7_z16:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z24\.b - z27\.b}, z7\.b, z16\.b
+** ret
+*/
+TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint8x4_t, svuint8_t, z24,
+ svclamp_single_u8_x4 (z1, z7, z16),
+ svclamp (z1, z7, z16))
+
+/*
+** clamp_z1_z24_z16_z23:
+** uclamp {z24\.b - z27\.b}, z16\.b, z23\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint8x4_t, svuint8_t, z1,
+ svclamp_single_u8_x4 (z24, z16, z23),
+ svclamp (z24, z16, z23))
+
+/*
+** clamp_z1_z1_z23_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z23\.b, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint8x4_t, svuint8_t, z1,
+ svclamp_single_u8_x4 (z1, z23, z0),
+ svclamp (z1, z23, z0))
+
+/*
+** clamp_z18_z18_z16_z5:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z16\.b, z5\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svuint8x4_t, svuint8_t, z18,
+ svclamp_single_u8_x4 (z18, z16, z5),
+ svclamp (z18, z16, z5))
+
+/*
+** clamp_awkward:
+** ...
+** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z[0-9]+\.b, z5\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint8x4_t, svuint8_t,
+ z0_res = svclamp_single_u8_x4 (z1, z0, zn),
+ z0_res = svclamp (z1, z0, zn))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.h, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c16 (pn0, 2),
+ x0 = svcntp_c16 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.h, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c16 (pn7, 4),
+ x15 = svcntp_c16 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.h, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c16 (pn8, 2),
+ x17 = svcntp_c16 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.h, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c16 (pn15, 4),
+ x0 = svcntp_c16 (pn15, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.s, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c32 (pn0, 2),
+ x0 = svcntp_c32 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.s, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c32 (pn7, 4),
+ x15 = svcntp_c32 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.s, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c32 (pn8, 2),
+ x17 = svcntp_c32 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.s, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c32 (pn15, 4),
+ x0 = svcntp_c32 (pn15, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.d, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c64 (pn0, 2),
+ x0 = svcntp_c64 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.d, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c64 (pn7, 4),
+ x15 = svcntp_c64 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.d, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c64 (pn8, 2),
+ x17 = svcntp_c64 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.d, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c64 (pn15, 4),
+ x0 = svcntp_c64 (pn15, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.b, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c8 (pn0, 2),
+ x0 = svcntp_c8 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.b, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c8 (pn7, 4),
+ x15 = svcntp_c8 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.b, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c8 (pn8, 2),
+ x17 = svcntp_c8 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.b, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c8 (pn15, 4),
+ x0 = svcntp_c8 (pn15, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z0:
+** bfcvt z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z0, svfloat32x2_t, svbfloat16_t,
+ z0_res = svcvt_bf16_f32_x2 (z0),
+ z0_res = svcvt_bf16 (z0))
+
+/*
+** cvt_z0_z6:
+** bfcvt z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z6, svfloat32x2_t, svbfloat16_t,
+ z0_res = svcvt_bf16_f32_x2 (z6),
+ z0_res = svcvt_bf16 (z6))
+
+/*
+** cvt_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** bfcvt z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z29, svfloat32x2_t, svbfloat16_t,
+ z0_res = svcvt_bf16_f32_x2 (z29),
+ z0_res = svcvt_bf16 (z29))
+
+/*
+** cvt_z5_z0:
+** bfcvt z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (cvt_z5_z0, svfloat32x2_t, svbfloat16_t,
+ z5 = svcvt_bf16_f32_x2 (z0),
+ z5 = svcvt_bf16 (z0))
+
+/*
+** cvt_z22_z16:
+** bfcvt z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (cvt_z22_z16, svfloat32x2_t, svbfloat16_t,
+ z22 = svcvt_bf16_f32_x2 (z16),
+ z22 = svcvt_bf16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z0:
+** fcvt z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z0, svfloat32x2_t, svfloat16_t,
+ z0_res = svcvt_f16_f32_x2 (z0),
+ z0_res = svcvt_f16 (z0))
+
+/*
+** cvt_z0_z6:
+** fcvt z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z6, svfloat32x2_t, svfloat16_t,
+ z0_res = svcvt_f16_f32_x2 (z6),
+ z0_res = svcvt_f16 (z6))
+
+/*
+** cvt_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** fcvt z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z29, svfloat32x2_t, svfloat16_t,
+ z0_res = svcvt_f16_f32_x2 (z29),
+ z0_res = svcvt_f16 (z29))
+
+/*
+** cvt_z5_z0:
+** fcvt z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (cvt_z5_z0, svfloat32x2_t, svfloat16_t,
+ z5 = svcvt_f16_f32_x2 (z0),
+ z5 = svcvt_f16 (z0))
+
+/*
+** cvt_z22_z16:
+** fcvt z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (cvt_z22_z16, svfloat32x2_t, svfloat16_t,
+ z22 = svcvt_f16_f32_x2 (z16),
+ z22 = svcvt_f16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z4:
+** scvtf {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z0_z4, svfloat32x2_t, svint32x2_t, z0,
+ svcvt_f32_s32_x2 (z4),
+ svcvt_f32 (z4))
+
+/*
+** cvt_z4_z0:
+** scvtf {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z0, svint32x2_t, svfloat32x2_t, z4,
+ svcvt_f32_s32_x2 (z0),
+ svcvt_f32 (z0))
+
+/*
+** cvt_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** scvtf {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z18_z23, svfloat32x2_t, svint32x2_t, z18,
+ svcvt_f32_s32_x2 (z23),
+ svcvt_f32 (z23))
+
+/*
+** cvt_z23_z28:
+** scvtf [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z28, svint32x2_t, svfloat32x2_t, z23,
+ svcvt_f32_s32_x2 (z28),
+ svcvt_f32 (z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z4:
+** scvtf {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z0_z4, svfloat32x4_t, svint32x4_t, z0,
+ svcvt_f32_s32_x4 (z4),
+ svcvt_f32 (z4))
+
+/*
+** cvt_z4_z0:
+** scvtf {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z0, svint32x4_t, svfloat32x4_t, z4,
+ svcvt_f32_s32_x4 (z0),
+ svcvt_f32 (z0))
+
+/*
+** cvt_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** scvtf {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z18, svint32x4_t, svfloat32x4_t, z4,
+ svcvt_f32_s32_x4 (z18),
+ svcvt_f32 (z18))
+
+/*
+** cvt_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** scvtf {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z28_z23, svfloat32x4_t, svint32x4_t, z28,
+ svcvt_f32_s32_x4 (z23),
+ svcvt_f32 (z23))
+
+/*
+** cvt_z23_z28:
+** scvtf [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z28, svint32x4_t, svfloat32x4_t, z23,
+ svcvt_f32_s32_x4 (z28),
+ svcvt_f32 (z28))
+
+/*
+** cvt_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** scvtf {z[^\n]+}, {z.*}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z18, svint32x4_t, svfloat32x4_t, z23,
+ svcvt_f32_s32_x4 (z18),
+ svcvt_f32 (z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z4:
+** ucvtf {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z0_z4, svfloat32x2_t, svuint32x2_t, z0,
+ svcvt_f32_u32_x2 (z4),
+ svcvt_f32 (z4))
+
+/*
+** cvt_z4_z0:
+** ucvtf {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z0, svuint32x2_t, svfloat32x2_t, z4,
+ svcvt_f32_u32_x2 (z0),
+ svcvt_f32 (z0))
+
+/*
+** cvt_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** ucvtf {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z18_z23, svfloat32x2_t, svuint32x2_t, z18,
+ svcvt_f32_u32_x2 (z23),
+ svcvt_f32 (z23))
+
+/*
+** cvt_z23_z28:
+** ucvtf [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z28, svuint32x2_t, svfloat32x2_t, z23,
+ svcvt_f32_u32_x2 (z28),
+ svcvt_f32 (z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z4:
+** ucvtf {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z0_z4, svfloat32x4_t, svuint32x4_t, z0,
+ svcvt_f32_u32_x4 (z4),
+ svcvt_f32 (z4))
+
+/*
+** cvt_z4_z0:
+** ucvtf {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z0, svuint32x4_t, svfloat32x4_t, z4,
+ svcvt_f32_u32_x4 (z0),
+ svcvt_f32 (z0))
+
+/*
+** cvt_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ucvtf {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z18, svuint32x4_t, svfloat32x4_t, z4,
+ svcvt_f32_u32_x4 (z18),
+ svcvt_f32 (z18))
+
+/*
+** cvt_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ucvtf {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z28_z23, svfloat32x4_t, svuint32x4_t, z28,
+ svcvt_f32_u32_x4 (z23),
+ svcvt_f32 (z23))
+
+/*
+** cvt_z23_z28:
+** ucvtf [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z28, svuint32x4_t, svfloat32x4_t, z23,
+ svcvt_f32_u32_x4 (z28),
+ svcvt_f32 (z28))
+
+/*
+** cvt_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ucvtf {z[^\n]+}, {z.*}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z18, svuint32x4_t, svfloat32x4_t, z23,
+ svcvt_f32_u32_x4 (z18),
+ svcvt_f32 (z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z4:
+** fcvtzs {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z0_z4, svint32x2_t, svfloat32x2_t, z0,
+ svcvt_s32_f32_x2 (z4),
+ svcvt_s32 (z4))
+
+/*
+** cvt_z4_z0:
+** fcvtzs {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z0, svfloat32x2_t, svint32x2_t, z4,
+ svcvt_s32_f32_x2 (z0),
+ svcvt_s32 (z0))
+
+/*
+** cvt_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** fcvtzs {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z18_z23, svint32x2_t, svfloat32x2_t, z18,
+ svcvt_s32_f32_x2 (z23),
+ svcvt_s32 (z23))
+
+/*
+** cvt_z23_z28:
+** fcvtzs [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z28, svfloat32x2_t, svint32x2_t, z23,
+ svcvt_s32_f32_x2 (z28),
+ svcvt_s32 (z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z4:
+** fcvtzs {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z0_z4, svint32x4_t, svfloat32x4_t, z0,
+ svcvt_s32_f32_x4 (z4),
+ svcvt_s32 (z4))
+
+/*
+** cvt_z4_z0:
+** fcvtzs {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z0, svfloat32x4_t, svint32x4_t, z4,
+ svcvt_s32_f32_x4 (z0),
+ svcvt_s32 (z0))
+
+/*
+** cvt_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvtzs {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z18, svfloat32x4_t, svint32x4_t, z4,
+ svcvt_s32_f32_x4 (z18),
+ svcvt_s32 (z18))
+
+/*
+** cvt_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvtzs {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z28_z23, svint32x4_t, svfloat32x4_t, z28,
+ svcvt_s32_f32_x4 (z23),
+ svcvt_s32 (z23))
+
+/*
+** cvt_z23_z28:
+** fcvtzs [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z28, svfloat32x4_t, svint32x4_t, z23,
+ svcvt_s32_f32_x4 (z28),
+ svcvt_s32 (z28))
+
+/*
+** cvt_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvtzs {z[^\n]+}, {z.*}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z18, svfloat32x4_t, svint32x4_t, z23,
+ svcvt_s32_f32_x4 (z18),
+ svcvt_s32 (z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z4:
+** fcvtzu {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z0_z4, svuint32x2_t, svfloat32x2_t, z0,
+ svcvt_u32_f32_x2 (z4),
+ svcvt_u32 (z4))
+
+/*
+** cvt_z4_z0:
+** fcvtzu {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z0, svfloat32x2_t, svuint32x2_t, z4,
+ svcvt_u32_f32_x2 (z0),
+ svcvt_u32 (z0))
+
+/*
+** cvt_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** fcvtzu {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z18_z23, svuint32x2_t, svfloat32x2_t, z18,
+ svcvt_u32_f32_x2 (z23),
+ svcvt_u32 (z23))
+
+/*
+** cvt_z23_z28:
+** fcvtzu [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z28, svfloat32x2_t, svuint32x2_t, z23,
+ svcvt_u32_f32_x2 (z28),
+ svcvt_u32 (z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z4:
+** fcvtzu {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z0_z4, svuint32x4_t, svfloat32x4_t, z0,
+ svcvt_u32_f32_x4 (z4),
+ svcvt_u32 (z4))
+
+/*
+** cvt_z4_z0:
+** fcvtzu {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z0, svfloat32x4_t, svuint32x4_t, z4,
+ svcvt_u32_f32_x4 (z0),
+ svcvt_u32 (z0))
+
+/*
+** cvt_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvtzu {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z4_z18, svfloat32x4_t, svuint32x4_t, z4,
+ svcvt_u32_f32_x4 (z18),
+ svcvt_u32 (z18))
+
+/*
+** cvt_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvtzu {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z28_z23, svuint32x4_t, svfloat32x4_t, z28,
+ svcvt_u32_f32_x4 (z23),
+ svcvt_u32 (z23))
+
+/*
+** cvt_z23_z28:
+** fcvtzu [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z28, svfloat32x4_t, svuint32x4_t, z23,
+ svcvt_u32_f32_x4 (z28),
+ svcvt_u32 (z28))
+
+/*
+** cvt_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvtzu {z[^\n]+}, {z.*}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (cvt_z23_z18, svfloat32x4_t, svuint32x4_t, z23,
+ svcvt_u32_f32_x4 (z18),
+ svcvt_u32 (z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvtn_z0_z0:
+** bfcvtn z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (cvtn_z0_z0, svfloat32x2_t, svbfloat16_t,
+ z0_res = svcvtn_bf16_f32_x2 (z0),
+ z0_res = svcvtn_bf16 (z0))
+
+/*
+** cvtn_z0_z6:
+** bfcvtn z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (cvtn_z0_z6, svfloat32x2_t, svbfloat16_t,
+ z0_res = svcvtn_bf16_f32_x2 (z6),
+ z0_res = svcvtn_bf16 (z6))
+
+/*
+** cvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** bfcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (cvtn_z0_z29, svfloat32x2_t, svbfloat16_t,
+ z0_res = svcvtn_bf16_f32_x2 (z29),
+ z0_res = svcvtn_bf16 (z29))
+
+/*
+** cvtn_z5_z0:
+** bfcvtn z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (cvtn_z5_z0, svfloat32x2_t, svbfloat16_t,
+ z5 = svcvtn_bf16_f32_x2 (z0),
+ z5 = svcvtn_bf16 (z0))
+
+/*
+** cvtn_z22_z16:
+** bfcvtn z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (cvtn_z22_z16, svfloat32x2_t, svbfloat16_t,
+ z22 = svcvtn_bf16_f32_x2 (z16),
+ z22 = svcvtn_bf16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** cvtn_z0_z0:
+** fcvtn z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (cvtn_z0_z0, svfloat32x2_t, svfloat16_t,
+ z0_res = svcvtn_f16_f32_x2 (z0),
+ z0_res = svcvtn_f16 (z0))
+
+/*
+** cvtn_z0_z6:
+** fcvtn z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (cvtn_z0_z6, svfloat32x2_t, svfloat16_t,
+ z0_res = svcvtn_f16_f32_x2 (z6),
+ z0_res = svcvtn_f16 (z6))
+
+/*
+** cvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** fcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (cvtn_z0_z29, svfloat32x2_t, svfloat16_t,
+ z0_res = svcvtn_f16_f32_x2 (z29),
+ z0_res = svcvtn_f16 (z29))
+
+/*
+** cvtn_z5_z0:
+** fcvtn z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (cvtn_z5_z0, svfloat32x2_t, svfloat16_t,
+ z5 = svcvtn_f16_f32_x2 (z0),
+ z5 = svcvtn_f16 (z0))
+
+/*
+** cvtn_z22_z16:
+** fcvtn z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (cvtn_z22_z16, svfloat32x2_t, svfloat16_t,
+ z22 = svcvtn_f16_f32_x2 (z16),
+ z22 = svcvtn_f16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_f32_tied1:
+** fdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_f32_tied1, svfloat32_t, svfloat16_t,
+ z0 = svdot_f32_f16 (z0, z4, z5),
+ z0 = svdot (z0, z4, z5))
+
+/*
+** dot_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f32_tied2, svfloat32_t, svfloat16_t,
+ z0_res = svdot_f32_f16 (z4, z0, z1),
+ z0_res = svdot (z4, z0, z1))
+
+/*
+** dot_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f32_tied3, svfloat32_t, svfloat16_t,
+ z0_res = svdot_f32_f16 (z4, z1, z0),
+ z0_res = svdot (z4, z1, z0))
+
+/*
+** dot_f32_untied:
+** movprfx z0, z1
+** fdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_f32_untied, svfloat32_t, svfloat16_t,
+ z0 = svdot_f32_f16 (z1, z4, z5),
+ z0 = svdot (z1, z4, z5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_f32_tied1:
+** fdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f32_tied1, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 0),
+ z0 = svdot_lane (z0, z4, z5, 0))
+
+/*
+** dot_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f32_tied2, svfloat32_t, svfloat16_t,
+ z0_res = svdot_lane_f32_f16 (z4, z0, z1, 0),
+ z0_res = svdot_lane (z4, z0, z1, 0))
+
+/*
+** dot_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f32_tied3, svfloat32_t, svfloat16_t,
+ z0_res = svdot_lane_f32_f16 (z4, z1, z0, 0),
+ z0_res = svdot_lane (z4, z1, z0, 0))
+
+/*
+** dot_lane_0_f32_untied:
+** movprfx z0, z1
+** fdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f32_untied, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z1, z4, z5, 0),
+ z0 = svdot_lane (z1, z4, z5, 0))
+
+/*
+** dot_lane_1_f32:
+** fdot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_f32, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 1),
+ z0 = svdot_lane (z0, z4, z5, 1))
+
+/*
+** dot_lane_2_f32:
+** fdot z0\.s, z4\.h, z5\.h\[2\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_2_f32, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 2),
+ z0 = svdot_lane (z0, z4, z5, 2))
+
+/*
+** dot_lane_3_f32:
+** fdot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_3_f32, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 3),
+ z0 = svdot_lane (z0, z4, z5, 3))
+
+/*
+** dot_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** fdot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_f32, svfloat32_t, svfloat16_t, z8,
+ z0 = svdot_lane_f32_f16 (z0, z1, z8, 1),
+ z0 = svdot_lane (z0, z1, z8, 1))
+
+/*
+** dot_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** fdot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_f32, svfloat32_t, svfloat16_t, z16,
+ z0 = svdot_lane_f32_f16 (z0, z1, z16, 1),
+ z0 = svdot_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_s32_tied1:
+** sdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_s32_tied1, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 0),
+ z0 = svdot_lane (z0, z4, z5, 0))
+
+/*
+** dot_lane_0_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_s32_tied2, svint32_t, svint16_t,
+ z0_res = svdot_lane_s32_s16 (z4, z0, z1, 0),
+ z0_res = svdot_lane (z4, z0, z1, 0))
+
+/*
+** dot_lane_0_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_s32_tied3, svint32_t, svint16_t,
+ z0_res = svdot_lane_s32_s16 (z4, z1, z0, 0),
+ z0_res = svdot_lane (z4, z1, z0, 0))
+
+/*
+** dot_lane_0_s32_untied:
+** movprfx z0, z1
+** sdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_s32_untied, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z1, z4, z5, 0),
+ z0 = svdot_lane (z1, z4, z5, 0))
+
+/*
+** dot_lane_1_s32:
+** sdot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_s32, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 1),
+ z0 = svdot_lane (z0, z4, z5, 1))
+
+/*
+** dot_lane_2_s32:
+** sdot z0\.s, z4\.h, z5\.h\[2\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_2_s32, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 2),
+ z0 = svdot_lane (z0, z4, z5, 2))
+
+/*
+** dot_lane_3_s32:
+** sdot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_3_s32, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 3),
+ z0 = svdot_lane (z0, z4, z5, 3))
+
+/*
+** dot_lane_z8_s32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** sdot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_s32, svint32_t, svint16_t, z8,
+ z0 = svdot_lane_s32_s16 (z0, z1, z8, 1),
+ z0 = svdot_lane (z0, z1, z8, 1))
+
+/*
+** dot_lane_z16_s32:
+** mov (z[0-7])\.d, z16\.d
+** sdot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_s32, svint32_t, svint16_t, z16,
+ z0 = svdot_lane_s32_s16 (z0, z1, z16, 1),
+ z0 = svdot_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_u32_tied1:
+** udot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_u32_tied1, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 0),
+ z0 = svdot_lane (z0, z4, z5, 0))
+
+/*
+** dot_lane_0_u32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_u32_tied2, svuint32_t, svuint16_t,
+ z0_res = svdot_lane_u32_u16 (z4, z0, z1, 0),
+ z0_res = svdot_lane (z4, z0, z1, 0))
+
+/*
+** dot_lane_0_u32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_u32_tied3, svuint32_t, svuint16_t,
+ z0_res = svdot_lane_u32_u16 (z4, z1, z0, 0),
+ z0_res = svdot_lane (z4, z1, z0, 0))
+
+/*
+** dot_lane_0_u32_untied:
+** movprfx z0, z1
+** udot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_u32_untied, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z1, z4, z5, 0),
+ z0 = svdot_lane (z1, z4, z5, 0))
+
+/*
+** dot_lane_1_u32:
+** udot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_u32, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 1),
+ z0 = svdot_lane (z0, z4, z5, 1))
+
+/*
+** dot_lane_2_u32:
+** udot z0\.s, z4\.h, z5\.h\[2\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_2_u32, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 2),
+ z0 = svdot_lane (z0, z4, z5, 2))
+
+/*
+** dot_lane_3_u32:
+** udot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_3_u32, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 3),
+ z0 = svdot_lane (z0, z4, z5, 3))
+
+/*
+** dot_lane_z8_u32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** udot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_u32, svuint32_t, svuint16_t, z8,
+ z0 = svdot_lane_u32_u16 (z0, z1, z8, 1),
+ z0 = svdot_lane (z0, z1, z8, 1))
+
+/*
+** dot_lane_z16_u32:
+** mov (z[0-7])\.d, z16\.d
+** udot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_u32, svuint32_t, svuint16_t, z16,
+ z0 = svdot_lane_u32_u16 (z0, z1, z16, 1),
+ z0 = svdot_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** bfdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** bfdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** bfdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** bfdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** bfdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** bfdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x2 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** bfdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** bfdot za\.s\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** bfdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** bfdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** bfdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** bfdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svbfloat16x4_t, svbfloat16_t,
+ svdot_lane_za32_bf16_vg1x4 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** fdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** fdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** fdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** fdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x2 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** fdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** fdot za\.s\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t,
+ svdot_lane_za32_f16_vg1x4 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** sdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** sdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** sdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** sdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** sdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** sdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint16x2_t, svint16_t,
+ svdot_lane_za32_s16_vg1x2 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** sdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** sdot za\.s\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** sdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** sdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint16x4_t, svint16_t,
+ svdot_lane_za32_s16_vg1x4 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** sdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** sdot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** sdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** sdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** sdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** sdot za\.s\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint8x2_t, svint8_t,
+ svdot_lane_za32_s8_vg1x2 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** sdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** sdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** sdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** sdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint8x4_t, svint8_t,
+ svdot_lane_za32_s8_vg1x4 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** udot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** udot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** udot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** udot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** udot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** udot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x2 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** udot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** udot za\.s\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** udot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** udot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** udot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** udot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t,
+ svdot_lane_za32_u16_vg1x4 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** udot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** udot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** udot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** udot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** udot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** udot za\.s\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint8x2_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x2 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (0, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (w0, z0, z7, 1),
+ svdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_2:
+** udot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (w8, z28, z4, 2),
+ svdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** dot_lane_w8p7_z0_z4_3:
+** udot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (w8 + 7, z0, z4, 3),
+ svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (w8 + 8, z0, z4, 0),
+ svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (w0 - 1, z0, z4, 1),
+ svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** udot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (w8, z4, z15, 2),
+ svdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** dot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** udot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (w8, z28, z16, 3),
+ svdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** udot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (w8, z17, z7, 0),
+ svdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** udot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint8x4_t, svuint8_t,
+ svdot_lane_za32_u8_vg1x4 (w8, z22, z4, 1),
+ svdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (0, z0, z4, 0),
+ svdot_lane_za64_vg1x2 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (w0, z0, z7, 1),
+ svdot_lane_za64_vg1x2 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_0:
+** sdot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_0, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (w8, z28, z4, 0),
+ svdot_lane_za64_vg1x2 (w8, z28, z4, 0))
+
+/*
+** dot_lane_w8p7_z0_z4_1:
+** sdot za\.d\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_1, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (w8 + 7, z0, z4, 1),
+ svdot_lane_za64_vg1x2 (w8 + 7, z0, z4, 1))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (w8 + 8, z0, z4, 0),
+ svdot_lane_za64_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (w0 - 1, z0, z4, 1),
+ svdot_lane_za64_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** sdot za\.d\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_0, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (w8, z4, z15, 0),
+ svdot_lane_za64_vg1x2 (w8, z4, z15, 0))
+
+/*
+** dot_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** sdot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_1, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (w8, z28, z16, 1),
+ svdot_lane_za64_vg1x2 (w8, z28, z16, 1))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** sdot za\.d\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (w8, z17, z7, 0),
+ svdot_lane_za64_vg1x2 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** sdot za\.d\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint16x2_t, svint16_t,
+ svdot_lane_za64_s16_vg1x2 (w8, z22, z4, 1),
+ svdot_lane_za64_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (0, z0, z4, 0),
+ svdot_lane_za64_vg1x4 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (w0, z0, z7, 1),
+ svdot_lane_za64_vg1x4 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_0:
+** sdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_0, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (w8, z28, z4, 0),
+ svdot_lane_za64_vg1x4 (w8, z28, z4, 0))
+
+/*
+** dot_lane_w8p7_z0_z4_1:
+** sdot za\.d\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_1, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (w8 + 7, z0, z4, 1),
+ svdot_lane_za64_vg1x4 (w8 + 7, z0, z4, 1))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (w8 + 8, z0, z4, 0),
+ svdot_lane_za64_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (w0 - 1, z0, z4, 1),
+ svdot_lane_za64_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** sdot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_0, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (w8, z4, z15, 0),
+ svdot_lane_za64_vg1x4 (w8, z4, z15, 0))
+
+/*
+** dot_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** sdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_1, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (w8, z28, z16, 1),
+ svdot_lane_za64_vg1x4 (w8, z28, z16, 1))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sdot za\.d\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (w8, z17, z7, 0),
+ svdot_lane_za64_vg1x4 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sdot za\.d\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint16x4_t, svint16_t,
+ svdot_lane_za64_s16_vg1x4 (w8, z22, z4, 1),
+ svdot_lane_za64_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (0, z0, z4, 0),
+ svdot_lane_za64_vg1x2 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (w0, z0, z7, 1),
+ svdot_lane_za64_vg1x2 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_0:
+** udot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_0, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (w8, z28, z4, 0),
+ svdot_lane_za64_vg1x2 (w8, z28, z4, 0))
+
+/*
+** dot_lane_w8p7_z0_z4_1:
+** udot za\.d\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_1, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (w8 + 7, z0, z4, 1),
+ svdot_lane_za64_vg1x2 (w8 + 7, z0, z4, 1))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (w8 + 8, z0, z4, 0),
+ svdot_lane_za64_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (w0 - 1, z0, z4, 1),
+ svdot_lane_za64_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** udot za\.d\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_0, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (w8, z4, z15, 0),
+ svdot_lane_za64_vg1x2 (w8, z4, z15, 0))
+
+/*
+** dot_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** udot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_1, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (w8, z28, z16, 1),
+ svdot_lane_za64_vg1x2 (w8, z28, z16, 1))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** udot za\.d\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (w8, z17, z7, 0),
+ svdot_lane_za64_vg1x2 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** udot za\.d\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x2 (w8, z22, z4, 1),
+ svdot_lane_za64_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (0, z0, z4, 0),
+ svdot_lane_za64_vg1x4 (0, z0, z4, 0))
+
+/*
+** dot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (w0, z0, z7, 1),
+ svdot_lane_za64_vg1x4 (w0, z0, z7, 1))
+
+/*
+** dot_lane_w8_z28_z4_0:
+** udot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z4_0, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (w8, z28, z4, 0),
+ svdot_lane_za64_vg1x4 (w8, z28, z4, 0))
+
+/*
+** dot_lane_w8p7_z0_z4_1:
+** udot za\.d\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p7_z0_z4_1, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (w8 + 7, z0, z4, 1),
+ svdot_lane_za64_vg1x4 (w8 + 7, z0, z4, 1))
+
+/*
+** dot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (w8 + 8, z0, z4, 0),
+ svdot_lane_za64_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** dot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (w0 - 1, z0, z4, 1),
+ svdot_lane_za64_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** dot_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** udot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_0, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (w8, z4, z15, 0),
+ svdot_lane_za64_vg1x4 (w8, z4, z15, 0))
+
+/*
+** dot_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** udot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z28_z16_1, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (w8, z28, z16, 1),
+ svdot_lane_za64_vg1x4 (w8, z28, z16, 1))
+
+/*
+** dot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** udot za\.d\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (w8, z17, z7, 0),
+ svdot_lane_za64_vg1x4 (w8, z17, z7, 0))
+
+/*
+** dot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** udot za\.d\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t,
+ svdot_lane_za64_u16_vg1x4 (w8, z22, z4, 1),
+ svdot_lane_za64_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_s32_tied1:
+** sdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_s32_tied1, svint32_t, svint16_t,
+ z0 = svdot_s32_s16 (z0, z4, z5),
+ z0 = svdot (z0, z4, z5))
+
+/*
+** dot_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_s32_tied2, svint32_t, svint16_t,
+ z0_res = svdot_s32_s16 (z4, z0, z1),
+ z0_res = svdot (z4, z0, z1))
+
+/*
+** dot_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_s32_tied3, svint32_t, svint16_t,
+ z0_res = svdot_s32_s16 (z4, z1, z0),
+ z0_res = svdot (z4, z1, z0))
+
+/*
+** dot_s32_untied:
+** movprfx z0, z1
+** sdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_s32_untied, svint32_t, svint16_t,
+ z0 = svdot_s32_s16 (z1, z4, z5),
+ z0 = svdot (z1, z4, z5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_u32_tied1:
+** udot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_u32_tied1, svuint32_t, svuint16_t,
+ z0 = svdot_u32_u16 (z0, z4, z5),
+ z0 = svdot (z0, z4, z5))
+
+/*
+** dot_u32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_u32_tied2, svuint32_t, svuint16_t,
+ z0_res = svdot_u32_u16 (z4, z0, z1),
+ z0_res = svdot (z4, z0, z1))
+
+/*
+** dot_u32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_u32_tied3, svuint32_t, svuint16_t,
+ z0_res = svdot_u32_u16 (z4, z1, z0),
+ z0_res = svdot (z4, z1, z0))
+
+/*
+** dot_u32_untied:
+** movprfx z0, z1
+** udot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_u32_untied, svuint32_t, svuint16_t,
+ z0 = svdot_u32_u16 (z1, z4, z5),
+ z0 = svdot (z1, z4, z5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (0, z0, z0),
+ svdot_za32_vg1x2 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w0, z0, z0),
+ svdot_za32_vg1x2 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** bfdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8, z0, z4),
+ svdot_za32_vg1x2 (w8, z0, z4))
+
+/*
+** dot_w8_z4_z18:
+** bfdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8, z4, z18),
+ svdot_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** bfdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8, z0, z23),
+ svdot_za32_vg1x2 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** bfdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8, z23, z0),
+ svdot_za32_vg1x2 (w8, z23, z0))
+
+/*
+** dot_w8_z18_z28:
+** bfdot za\.s\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8, z18, z28),
+ svdot_za32_vg1x2 (w8, z18, z28))
+
+/*
+** dot_w8_z28_z4:
+** bfdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8, z28, z4),
+ svdot_za32_vg1x2 (w8, z28, z4))
+
+/*
+** dot_w8p1_z4_z0:
+** bfdot za\.s\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8 + 1, z4, z0),
+ svdot_za32_vg1x2 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** bfdot za\.s\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8 + 2, z4, z0),
+ svdot_za32_vg1x2 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** bfdot za\.s\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w11 + 4, z4, z0),
+ svdot_za32_vg1x2 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** bfdot za\.s\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8 + 7, z4, z0),
+ svdot_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8 + 8, z4, z4),
+ svdot_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svbfloat16x2_t,
+ svdot_za32_bf16_vg1x2 (w8 - 1, z4, z0),
+ svdot_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (0, z1, z0),
+ svdot_za32_vg1x2 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** bfdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w0, z1, z0),
+ svdot_za32_vg1x2 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** bfdot za\.s\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w8, z1, z0),
+ svdot_za32_vg1x2 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** bfdot za\.s\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w8 + 1, z1, z0),
+ svdot_za32_vg1x2 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p2_z20_z0:
+** bfdot za\.s\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w8 + 2, z20, z0),
+ svdot_za32_vg1x2 (w8 + 2, z20, z0))
+
+/*
+** dot_single_w11p4_z27_z0:
+** bfdot za\.s\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w11 + 4, z27, z0),
+ svdot_za32_vg1x2 (w11 + 4, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** bfdot za\.s\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w8 + 7, z1, z0),
+ svdot_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** bfdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w8 + 8, z1, z0),
+ svdot_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w0 - 1, z1, z0),
+ svdot_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** bfdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w8, z0, z15),
+ svdot_za32_vg1x2 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** bfdot za\.s\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svbfloat16x2_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x2 (w8, z20, z16),
+ svdot_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (0, z0, z0),
+ svdot_za32_vg1x4 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w0, z0, z0),
+ svdot_za32_vg1x4 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** bfdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8, z0, z4),
+ svdot_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** bfdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8, z0, z18),
+ svdot_za32_vg1x4 (w8, z0, z18))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** bfdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8, z18, z0),
+ svdot_za32_vg1x4 (w8, z18, z0))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** bfdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8, z0, z23),
+ svdot_za32_vg1x4 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** bfdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8, z23, z0),
+ svdot_za32_vg1x4 (w8, z23, z0))
+
+/*
+** dot_w8_z4_z28:
+** bfdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8, z4, z28),
+ svdot_za32_vg1x4 (w8, z4, z28))
+
+/*
+** dot_w8_z28_z0:
+** bfdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8, z28, z0),
+ svdot_za32_vg1x4 (w8, z28, z0))
+
+/*
+** dot_w8p1_z4_z0:
+** bfdot za\.s\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8 + 1, z4, z0),
+ svdot_za32_vg1x4 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** bfdot za\.s\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8 + 2, z4, z0),
+ svdot_za32_vg1x4 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** bfdot za\.s\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w11 + 4, z4, z0),
+ svdot_za32_vg1x4 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** bfdot za\.s\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8 + 7, z4, z0),
+ svdot_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8 + 8, z4, z4),
+ svdot_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svbfloat16x4_t,
+ svdot_za32_bf16_vg1x4 (w8 - 1, z4, z0),
+ svdot_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (0, z1, z0),
+ svdot_za32_vg1x4 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** bfdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w0, z1, z0),
+ svdot_za32_vg1x4 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** bfdot za\.s\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w8, z1, z0),
+ svdot_za32_vg1x4 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** bfdot za\.s\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w8 + 1, z1, z0),
+ svdot_za32_vg1x4 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p4_z20_z0:
+** bfdot za\.s\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w8 + 4, z20, z0),
+ svdot_za32_vg1x4 (w8 + 4, z20, z0))
+
+/*
+** dot_single_w8p6_z27_z0:
+** bfdot za\.s\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w8 + 6, z27, z0),
+ svdot_za32_vg1x4 (w8 + 6, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** bfdot za\.s\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w8 + 7, z1, z0),
+ svdot_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** bfdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w8 + 8, z1, z0),
+ svdot_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w0 - 1, z1, z0),
+ svdot_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** bfdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w8, z0, z15),
+ svdot_za32_vg1x4 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** bfdot za\.s\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svbfloat16x4_t, svbfloat16_t,
+ svdot_single_za32_bf16_vg1x4 (w8, z20, z16),
+ svdot_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (0, z0, z0),
+ svdot_za32_vg1x2 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w0, z0, z0),
+ svdot_za32_vg1x2 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** fdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8, z0, z4),
+ svdot_za32_vg1x2 (w8, z0, z4))
+
+/*
+** dot_w8_z4_z18:
+** fdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8, z4, z18),
+ svdot_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** fdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8, z0, z23),
+ svdot_za32_vg1x2 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** fdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8, z23, z0),
+ svdot_za32_vg1x2 (w8, z23, z0))
+
+/*
+** dot_w8_z18_z28:
+** fdot za\.s\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8, z18, z28),
+ svdot_za32_vg1x2 (w8, z18, z28))
+
+/*
+** dot_w8_z28_z4:
+** fdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8, z28, z4),
+ svdot_za32_vg1x2 (w8, z28, z4))
+
+/*
+** dot_w8p1_z4_z0:
+** fdot za\.s\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8 + 1, z4, z0),
+ svdot_za32_vg1x2 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** fdot za\.s\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8 + 2, z4, z0),
+ svdot_za32_vg1x2 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** fdot za\.s\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w11 + 4, z4, z0),
+ svdot_za32_vg1x2 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** fdot za\.s\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8 + 7, z4, z0),
+ svdot_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8 + 8, z4, z4),
+ svdot_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svfloat16x2_t,
+ svdot_za32_f16_vg1x2 (w8 - 1, z4, z0),
+ svdot_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (0, z1, z0),
+ svdot_za32_vg1x2 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w0, z1, z0),
+ svdot_za32_vg1x2 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** fdot za\.s\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w8, z1, z0),
+ svdot_za32_vg1x2 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** fdot za\.s\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w8 + 1, z1, z0),
+ svdot_za32_vg1x2 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p2_z20_z0:
+** fdot za\.s\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w8 + 2, z20, z0),
+ svdot_za32_vg1x2 (w8 + 2, z20, z0))
+
+/*
+** dot_single_w11p4_z27_z0:
+** fdot za\.s\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w11 + 4, z27, z0),
+ svdot_za32_vg1x2 (w11 + 4, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** fdot za\.s\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w8 + 7, z1, z0),
+ svdot_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w8 + 8, z1, z0),
+ svdot_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w0 - 1, z1, z0),
+ svdot_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w8, z0, z15),
+ svdot_za32_vg1x2 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fdot za\.s\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svfloat16x2_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x2 (w8, z20, z16),
+ svdot_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (0, z0, z0),
+ svdot_za32_vg1x4 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w0, z0, z0),
+ svdot_za32_vg1x4 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** fdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8, z0, z4),
+ svdot_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** fdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8, z0, z18),
+ svdot_za32_vg1x4 (w8, z0, z18))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** fdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8, z18, z0),
+ svdot_za32_vg1x4 (w8, z18, z0))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** fdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8, z0, z23),
+ svdot_za32_vg1x4 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** fdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8, z23, z0),
+ svdot_za32_vg1x4 (w8, z23, z0))
+
+/*
+** dot_w8_z4_z28:
+** fdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8, z4, z28),
+ svdot_za32_vg1x4 (w8, z4, z28))
+
+/*
+** dot_w8_z28_z0:
+** fdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8, z28, z0),
+ svdot_za32_vg1x4 (w8, z28, z0))
+
+/*
+** dot_w8p1_z4_z0:
+** fdot za\.s\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8 + 1, z4, z0),
+ svdot_za32_vg1x4 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** fdot za\.s\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8 + 2, z4, z0),
+ svdot_za32_vg1x4 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** fdot za\.s\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w11 + 4, z4, z0),
+ svdot_za32_vg1x4 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** fdot za\.s\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8 + 7, z4, z0),
+ svdot_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8 + 8, z4, z4),
+ svdot_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svfloat16x4_t,
+ svdot_za32_f16_vg1x4 (w8 - 1, z4, z0),
+ svdot_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (0, z1, z0),
+ svdot_za32_vg1x4 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w0, z1, z0),
+ svdot_za32_vg1x4 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** fdot za\.s\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w8, z1, z0),
+ svdot_za32_vg1x4 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** fdot za\.s\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w8 + 1, z1, z0),
+ svdot_za32_vg1x4 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p4_z20_z0:
+** fdot za\.s\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w8 + 4, z20, z0),
+ svdot_za32_vg1x4 (w8 + 4, z20, z0))
+
+/*
+** dot_single_w8p6_z27_z0:
+** fdot za\.s\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w8 + 6, z27, z0),
+ svdot_za32_vg1x4 (w8 + 6, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** fdot za\.s\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w8 + 7, z1, z0),
+ svdot_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w8 + 8, z1, z0),
+ svdot_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w0 - 1, z1, z0),
+ svdot_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w8, z0, z15),
+ svdot_za32_vg1x4 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fdot za\.s\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svfloat16x4_t, svfloat16_t,
+ svdot_single_za32_f16_vg1x4 (w8, z20, z16),
+ svdot_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svint16x2_t,
+ svdot_za32_s16_vg1x2 (0, z0, z0),
+ svdot_za32_vg1x2 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w0, z0, z0),
+ svdot_za32_vg1x2 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** sdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8, z0, z4),
+ svdot_za32_vg1x2 (w8, z0, z4))
+
+/*
+** dot_w8_z4_z18:
+** sdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8, z4, z18),
+ svdot_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** sdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8, z0, z23),
+ svdot_za32_vg1x2 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** sdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8, z23, z0),
+ svdot_za32_vg1x2 (w8, z23, z0))
+
+/*
+** dot_w8_z18_z28:
+** sdot za\.s\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8, z18, z28),
+ svdot_za32_vg1x2 (w8, z18, z28))
+
+/*
+** dot_w8_z28_z4:
+** sdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8, z28, z4),
+ svdot_za32_vg1x2 (w8, z28, z4))
+
+/*
+** dot_w8p1_z4_z0:
+** sdot za\.s\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8 + 1, z4, z0),
+ svdot_za32_vg1x2 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** sdot za\.s\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8 + 2, z4, z0),
+ svdot_za32_vg1x2 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** sdot za\.s\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w11 + 4, z4, z0),
+ svdot_za32_vg1x2 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** sdot za\.s\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8 + 7, z4, z0),
+ svdot_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8 + 8, z4, z4),
+ svdot_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svint16x2_t,
+ svdot_za32_s16_vg1x2 (w8 - 1, z4, z0),
+ svdot_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (0, z1, z0),
+ svdot_za32_vg1x2 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w0, z1, z0),
+ svdot_za32_vg1x2 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** sdot za\.s\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w8, z1, z0),
+ svdot_za32_vg1x2 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** sdot za\.s\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w8 + 1, z1, z0),
+ svdot_za32_vg1x2 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p2_z20_z0:
+** sdot za\.s\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w8 + 2, z20, z0),
+ svdot_za32_vg1x2 (w8 + 2, z20, z0))
+
+/*
+** dot_single_w11p4_z27_z0:
+** sdot za\.s\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w11 + 4, z27, z0),
+ svdot_za32_vg1x2 (w11 + 4, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** sdot za\.s\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w8 + 7, z1, z0),
+ svdot_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w8 + 8, z1, z0),
+ svdot_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w0 - 1, z1, z0),
+ svdot_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w8, z0, z15),
+ svdot_za32_vg1x2 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sdot za\.s\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint16x2_t, svint16_t,
+ svdot_single_za32_s16_vg1x2 (w8, z20, z16),
+ svdot_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (0, z0, z0),
+ svdot_za32_vg1x4 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w0, z0, z0),
+ svdot_za32_vg1x4 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** sdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8, z0, z4),
+ svdot_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** sdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8, z0, z18),
+ svdot_za32_vg1x4 (w8, z0, z18))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** sdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8, z18, z0),
+ svdot_za32_vg1x4 (w8, z18, z0))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** sdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8, z0, z23),
+ svdot_za32_vg1x4 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** sdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8, z23, z0),
+ svdot_za32_vg1x4 (w8, z23, z0))
+
+/*
+** dot_w8_z4_z28:
+** sdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8, z4, z28),
+ svdot_za32_vg1x4 (w8, z4, z28))
+
+/*
+** dot_w8_z28_z0:
+** sdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8, z28, z0),
+ svdot_za32_vg1x4 (w8, z28, z0))
+
+/*
+** dot_w8p1_z4_z0:
+** sdot za\.s\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8 + 1, z4, z0),
+ svdot_za32_vg1x4 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** sdot za\.s\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8 + 2, z4, z0),
+ svdot_za32_vg1x4 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** sdot za\.s\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w11 + 4, z4, z0),
+ svdot_za32_vg1x4 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** sdot za\.s\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8 + 7, z4, z0),
+ svdot_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8 + 8, z4, z4),
+ svdot_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svint16x4_t,
+ svdot_za32_s16_vg1x4 (w8 - 1, z4, z0),
+ svdot_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (0, z1, z0),
+ svdot_za32_vg1x4 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w0, z1, z0),
+ svdot_za32_vg1x4 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** sdot za\.s\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w8, z1, z0),
+ svdot_za32_vg1x4 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** sdot za\.s\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w8 + 1, z1, z0),
+ svdot_za32_vg1x4 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p4_z20_z0:
+** sdot za\.s\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w8 + 4, z20, z0),
+ svdot_za32_vg1x4 (w8 + 4, z20, z0))
+
+/*
+** dot_single_w8p6_z27_z0:
+** sdot za\.s\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w8 + 6, z27, z0),
+ svdot_za32_vg1x4 (w8 + 6, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** sdot za\.s\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w8 + 7, z1, z0),
+ svdot_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w8 + 8, z1, z0),
+ svdot_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w0 - 1, z1, z0),
+ svdot_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w8, z0, z15),
+ svdot_za32_vg1x4 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sdot za\.s\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint16x4_t, svint16_t,
+ svdot_single_za32_s16_vg1x4 (w8, z20, z16),
+ svdot_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svint8x2_t,
+ svdot_za32_s8_vg1x2 (0, z0, z0),
+ svdot_za32_vg1x2 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w0, z0, z0),
+ svdot_za32_vg1x2 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** sdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8, z0, z4),
+ svdot_za32_vg1x2 (w8, z0, z4))
+
+/*
+** dot_w8_z4_z18:
+** sdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8, z4, z18),
+ svdot_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** sdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8, z0, z23),
+ svdot_za32_vg1x2 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** sdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8, z23, z0),
+ svdot_za32_vg1x2 (w8, z23, z0))
+
+/*
+** dot_w8_z18_z28:
+** sdot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8, z18, z28),
+ svdot_za32_vg1x2 (w8, z18, z28))
+
+/*
+** dot_w8_z28_z4:
+** sdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8, z28, z4),
+ svdot_za32_vg1x2 (w8, z28, z4))
+
+/*
+** dot_w8p1_z4_z0:
+** sdot za\.s\[w8, 1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8 + 1, z4, z0),
+ svdot_za32_vg1x2 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** sdot za\.s\[w8, 2, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8 + 2, z4, z0),
+ svdot_za32_vg1x2 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** sdot za\.s\[w11, 4, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w11 + 4, z4, z0),
+ svdot_za32_vg1x2 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** sdot za\.s\[w8, 7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8 + 7, z4, z0),
+ svdot_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8 + 8, z4, z4),
+ svdot_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svint8x2_t,
+ svdot_za32_s8_vg1x2 (w8 - 1, z4, z0),
+ svdot_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (0, z1, z0),
+ svdot_za32_vg1x2 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w0, z1, z0),
+ svdot_za32_vg1x2 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** sdot za\.s\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w8, z1, z0),
+ svdot_za32_vg1x2 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** sdot za\.s\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w8 + 1, z1, z0),
+ svdot_za32_vg1x2 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p2_z20_z0:
+** sdot za\.s\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w8 + 2, z20, z0),
+ svdot_za32_vg1x2 (w8 + 2, z20, z0))
+
+/*
+** dot_single_w11p4_z27_z0:
+** sdot za\.s\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w11 + 4, z27, z0),
+ svdot_za32_vg1x2 (w11 + 4, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** sdot za\.s\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w8 + 7, z1, z0),
+ svdot_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w8 + 8, z1, z0),
+ svdot_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w0 - 1, z1, z0),
+ svdot_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w8, z0, z15),
+ svdot_za32_vg1x2 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sdot za\.s\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint8x2_t, svint8_t,
+ svdot_single_za32_s8_vg1x2 (w8, z20, z16),
+ svdot_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (0, z0, z0),
+ svdot_za32_vg1x4 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w0, z0, z0),
+ svdot_za32_vg1x4 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** sdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8, z0, z4),
+ svdot_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** sdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8, z0, z18),
+ svdot_za32_vg1x4 (w8, z0, z18))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** sdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8, z18, z0),
+ svdot_za32_vg1x4 (w8, z18, z0))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** sdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8, z0, z23),
+ svdot_za32_vg1x4 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** sdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8, z23, z0),
+ svdot_za32_vg1x4 (w8, z23, z0))
+
+/*
+** dot_w8_z4_z28:
+** sdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8, z4, z28),
+ svdot_za32_vg1x4 (w8, z4, z28))
+
+/*
+** dot_w8_z28_z0:
+** sdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8, z28, z0),
+ svdot_za32_vg1x4 (w8, z28, z0))
+
+/*
+** dot_w8p1_z4_z0:
+** sdot za\.s\[w8, 1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8 + 1, z4, z0),
+ svdot_za32_vg1x4 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** sdot za\.s\[w8, 2, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8 + 2, z4, z0),
+ svdot_za32_vg1x4 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** sdot za\.s\[w11, 4, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w11 + 4, z4, z0),
+ svdot_za32_vg1x4 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** sdot za\.s\[w8, 7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8 + 7, z4, z0),
+ svdot_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8 + 8, z4, z4),
+ svdot_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svint8x4_t,
+ svdot_za32_s8_vg1x4 (w8 - 1, z4, z0),
+ svdot_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (0, z1, z0),
+ svdot_za32_vg1x4 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w0, z1, z0),
+ svdot_za32_vg1x4 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** sdot za\.s\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w8, z1, z0),
+ svdot_za32_vg1x4 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** sdot za\.s\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w8 + 1, z1, z0),
+ svdot_za32_vg1x4 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p4_z20_z0:
+** sdot za\.s\[w8, 4, vgx4\], {z20\.b - z23\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w8 + 4, z20, z0),
+ svdot_za32_vg1x4 (w8 + 4, z20, z0))
+
+/*
+** dot_single_w8p6_z27_z0:
+** sdot za\.s\[w8, 6, vgx4\], {z27\.b - z30\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w8 + 6, z27, z0),
+ svdot_za32_vg1x4 (w8 + 6, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** sdot za\.s\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w8 + 7, z1, z0),
+ svdot_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w8 + 8, z1, z0),
+ svdot_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w0 - 1, z1, z0),
+ svdot_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w8, z0, z15),
+ svdot_za32_vg1x4 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sdot za\.s\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint8x4_t, svint8_t,
+ svdot_single_za32_s8_vg1x4 (w8, z20, z16),
+ svdot_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (0, z0, z0),
+ svdot_za32_vg1x2 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w0, z0, z0),
+ svdot_za32_vg1x2 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** udot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8, z0, z4),
+ svdot_za32_vg1x2 (w8, z0, z4))
+
+/*
+** dot_w8_z4_z18:
+** udot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8, z4, z18),
+ svdot_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** udot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8, z0, z23),
+ svdot_za32_vg1x2 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** udot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8, z23, z0),
+ svdot_za32_vg1x2 (w8, z23, z0))
+
+/*
+** dot_w8_z18_z28:
+** udot za\.s\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8, z18, z28),
+ svdot_za32_vg1x2 (w8, z18, z28))
+
+/*
+** dot_w8_z28_z4:
+** udot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8, z28, z4),
+ svdot_za32_vg1x2 (w8, z28, z4))
+
+/*
+** dot_w8p1_z4_z0:
+** udot za\.s\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8 + 1, z4, z0),
+ svdot_za32_vg1x2 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** udot za\.s\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8 + 2, z4, z0),
+ svdot_za32_vg1x2 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** udot za\.s\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w11 + 4, z4, z0),
+ svdot_za32_vg1x2 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** udot za\.s\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8 + 7, z4, z0),
+ svdot_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8 + 8, z4, z4),
+ svdot_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** udot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svuint16x2_t,
+ svdot_za32_u16_vg1x2 (w8 - 1, z4, z0),
+ svdot_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (0, z1, z0),
+ svdot_za32_vg1x2 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w0, z1, z0),
+ svdot_za32_vg1x2 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** udot za\.s\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w8, z1, z0),
+ svdot_za32_vg1x2 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** udot za\.s\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w8 + 1, z1, z0),
+ svdot_za32_vg1x2 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p2_z20_z0:
+** udot za\.s\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w8 + 2, z20, z0),
+ svdot_za32_vg1x2 (w8 + 2, z20, z0))
+
+/*
+** dot_single_w11p4_z27_z0:
+** udot za\.s\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w11 + 4, z27, z0),
+ svdot_za32_vg1x2 (w11 + 4, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** udot za\.s\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w8 + 7, z1, z0),
+ svdot_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w8 + 8, z1, z0),
+ svdot_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w0 - 1, z1, z0),
+ svdot_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** udot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w8, z0, z15),
+ svdot_za32_vg1x2 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** udot za\.s\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint16x2_t, svuint16_t,
+ svdot_single_za32_u16_vg1x2 (w8, z20, z16),
+ svdot_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (0, z0, z0),
+ svdot_za32_vg1x4 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w0, z0, z0),
+ svdot_za32_vg1x4 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** udot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8, z0, z4),
+ svdot_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** udot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8, z0, z18),
+ svdot_za32_vg1x4 (w8, z0, z18))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** udot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8, z18, z0),
+ svdot_za32_vg1x4 (w8, z18, z0))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** udot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8, z0, z23),
+ svdot_za32_vg1x4 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** udot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8, z23, z0),
+ svdot_za32_vg1x4 (w8, z23, z0))
+
+/*
+** dot_w8_z4_z28:
+** udot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8, z4, z28),
+ svdot_za32_vg1x4 (w8, z4, z28))
+
+/*
+** dot_w8_z28_z0:
+** udot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8, z28, z0),
+ svdot_za32_vg1x4 (w8, z28, z0))
+
+/*
+** dot_w8p1_z4_z0:
+** udot za\.s\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8 + 1, z4, z0),
+ svdot_za32_vg1x4 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** udot za\.s\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8 + 2, z4, z0),
+ svdot_za32_vg1x4 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** udot za\.s\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w11 + 4, z4, z0),
+ svdot_za32_vg1x4 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** udot za\.s\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8 + 7, z4, z0),
+ svdot_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8 + 8, z4, z4),
+ svdot_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** udot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svuint16x4_t,
+ svdot_za32_u16_vg1x4 (w8 - 1, z4, z0),
+ svdot_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (0, z1, z0),
+ svdot_za32_vg1x4 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w0, z1, z0),
+ svdot_za32_vg1x4 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** udot za\.s\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w8, z1, z0),
+ svdot_za32_vg1x4 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** udot za\.s\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w8 + 1, z1, z0),
+ svdot_za32_vg1x4 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p4_z20_z0:
+** udot za\.s\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w8 + 4, z20, z0),
+ svdot_za32_vg1x4 (w8 + 4, z20, z0))
+
+/*
+** dot_single_w8p6_z27_z0:
+** udot za\.s\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w8 + 6, z27, z0),
+ svdot_za32_vg1x4 (w8 + 6, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** udot za\.s\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w8 + 7, z1, z0),
+ svdot_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w8 + 8, z1, z0),
+ svdot_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w0 - 1, z1, z0),
+ svdot_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** udot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w8, z0, z15),
+ svdot_za32_vg1x4 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** udot za\.s\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint16x4_t, svuint16_t,
+ svdot_single_za32_u16_vg1x4 (w8, z20, z16),
+ svdot_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (0, z0, z0),
+ svdot_za32_vg1x2 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w0, z0, z0),
+ svdot_za32_vg1x2 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** udot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8, z0, z4),
+ svdot_za32_vg1x2 (w8, z0, z4))
+
+/*
+** dot_w8_z4_z18:
+** udot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8, z4, z18),
+ svdot_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** udot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8, z0, z23),
+ svdot_za32_vg1x2 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** udot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8, z23, z0),
+ svdot_za32_vg1x2 (w8, z23, z0))
+
+/*
+** dot_w8_z18_z28:
+** udot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8, z18, z28),
+ svdot_za32_vg1x2 (w8, z18, z28))
+
+/*
+** dot_w8_z28_z4:
+** udot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8, z28, z4),
+ svdot_za32_vg1x2 (w8, z28, z4))
+
+/*
+** dot_w8p1_z4_z0:
+** udot za\.s\[w8, 1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8 + 1, z4, z0),
+ svdot_za32_vg1x2 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** udot za\.s\[w8, 2, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8 + 2, z4, z0),
+ svdot_za32_vg1x2 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** udot za\.s\[w11, 4, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w11 + 4, z4, z0),
+ svdot_za32_vg1x2 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** udot za\.s\[w8, 7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8 + 7, z4, z0),
+ svdot_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8 + 8, z4, z4),
+ svdot_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** udot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svuint8x2_t,
+ svdot_za32_u8_vg1x2 (w8 - 1, z4, z0),
+ svdot_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (0, z1, z0),
+ svdot_za32_vg1x2 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w0, z1, z0),
+ svdot_za32_vg1x2 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** udot za\.s\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w8, z1, z0),
+ svdot_za32_vg1x2 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** udot za\.s\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w8 + 1, z1, z0),
+ svdot_za32_vg1x2 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p2_z20_z0:
+** udot za\.s\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w8 + 2, z20, z0),
+ svdot_za32_vg1x2 (w8 + 2, z20, z0))
+
+/*
+** dot_single_w11p4_z27_z0:
+** udot za\.s\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w11 + 4, z27, z0),
+ svdot_za32_vg1x2 (w11 + 4, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** udot za\.s\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w8 + 7, z1, z0),
+ svdot_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w8 + 8, z1, z0),
+ svdot_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w0 - 1, z1, z0),
+ svdot_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** udot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w8, z0, z15),
+ svdot_za32_vg1x2 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** udot za\.s\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint8x2_t, svuint8_t,
+ svdot_single_za32_u8_vg1x2 (w8, z20, z16),
+ svdot_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (0, z0, z0),
+ svdot_za32_vg1x4 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w0, z0, z0),
+ svdot_za32_vg1x4 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** udot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8, z0, z4),
+ svdot_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** udot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8, z0, z18),
+ svdot_za32_vg1x4 (w8, z0, z18))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** udot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8, z18, z0),
+ svdot_za32_vg1x4 (w8, z18, z0))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** udot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8, z0, z23),
+ svdot_za32_vg1x4 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** udot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8, z23, z0),
+ svdot_za32_vg1x4 (w8, z23, z0))
+
+/*
+** dot_w8_z4_z28:
+** udot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8, z4, z28),
+ svdot_za32_vg1x4 (w8, z4, z28))
+
+/*
+** dot_w8_z28_z0:
+** udot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8, z28, z0),
+ svdot_za32_vg1x4 (w8, z28, z0))
+
+/*
+** dot_w8p1_z4_z0:
+** udot za\.s\[w8, 1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8 + 1, z4, z0),
+ svdot_za32_vg1x4 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** udot za\.s\[w8, 2, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8 + 2, z4, z0),
+ svdot_za32_vg1x4 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** udot za\.s\[w11, 4, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w11 + 4, z4, z0),
+ svdot_za32_vg1x4 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** udot za\.s\[w8, 7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8 + 7, z4, z0),
+ svdot_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8 + 8, z4, z4),
+ svdot_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** udot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svuint8x4_t,
+ svdot_za32_u8_vg1x4 (w8 - 1, z4, z0),
+ svdot_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (0, z1, z0),
+ svdot_za32_vg1x4 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w0, z1, z0),
+ svdot_za32_vg1x4 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** udot za\.s\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w8, z1, z0),
+ svdot_za32_vg1x4 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** udot za\.s\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w8 + 1, z1, z0),
+ svdot_za32_vg1x4 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p4_z20_z0:
+** udot za\.s\[w8, 4, vgx4\], {z20\.b - z23\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w8 + 4, z20, z0),
+ svdot_za32_vg1x4 (w8 + 4, z20, z0))
+
+/*
+** dot_single_w8p6_z27_z0:
+** udot za\.s\[w8, 6, vgx4\], {z27\.b - z30\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w8 + 6, z27, z0),
+ svdot_za32_vg1x4 (w8 + 6, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** udot za\.s\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w8 + 7, z1, z0),
+ svdot_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w8 + 8, z1, z0),
+ svdot_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w0 - 1, z1, z0),
+ svdot_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** udot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w8, z0, z15),
+ svdot_za32_vg1x4 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** udot za\.s\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint8x4_t, svuint8_t,
+ svdot_single_za32_u8_vg1x4 (w8, z20, z16),
+ svdot_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svint16x2_t,
+ svdot_za64_s16_vg1x2 (0, z0, z0),
+ svdot_za64_vg1x2 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w0, z0, z0),
+ svdot_za64_vg1x2 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** sdot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8, z0, z4),
+ svdot_za64_vg1x2 (w8, z0, z4))
+
+/*
+** dot_w8_z4_z18:
+** sdot za\.d\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8, z4, z18),
+ svdot_za64_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** sdot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8, z0, z23),
+ svdot_za64_vg1x2 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** sdot za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8, z23, z0),
+ svdot_za64_vg1x2 (w8, z23, z0))
+
+/*
+** dot_w8_z18_z28:
+** sdot za\.d\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8, z18, z28),
+ svdot_za64_vg1x2 (w8, z18, z28))
+
+/*
+** dot_w8_z28_z4:
+** sdot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8, z28, z4),
+ svdot_za64_vg1x2 (w8, z28, z4))
+
+/*
+** dot_w8p1_z4_z0:
+** sdot za\.d\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8 + 1, z4, z0),
+ svdot_za64_vg1x2 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** sdot za\.d\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8 + 2, z4, z0),
+ svdot_za64_vg1x2 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** sdot za\.d\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w11 + 4, z4, z0),
+ svdot_za64_vg1x2 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** sdot za\.d\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8 + 7, z4, z0),
+ svdot_za64_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.d\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8 + 8, z4, z4),
+ svdot_za64_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sdot za\.d\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svint16x2_t,
+ svdot_za64_s16_vg1x2 (w8 - 1, z4, z0),
+ svdot_za64_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (0, z1, z0),
+ svdot_za64_vg1x2 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w0, z1, z0),
+ svdot_za64_vg1x2 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** sdot za\.d\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w8, z1, z0),
+ svdot_za64_vg1x2 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** sdot za\.d\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w8 + 1, z1, z0),
+ svdot_za64_vg1x2 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p2_z20_z0:
+** sdot za\.d\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w8 + 2, z20, z0),
+ svdot_za64_vg1x2 (w8 + 2, z20, z0))
+
+/*
+** dot_single_w11p4_z27_z0:
+** sdot za\.d\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w11 + 4, z27, z0),
+ svdot_za64_vg1x2 (w11 + 4, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** sdot za\.d\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w8 + 7, z1, z0),
+ svdot_za64_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w8 + 8, z1, z0),
+ svdot_za64_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w0 - 1, z1, z0),
+ svdot_za64_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sdot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w8, z0, z15),
+ svdot_za64_vg1x2 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sdot za\.d\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint16x2_t, svint16_t,
+ svdot_single_za64_s16_vg1x2 (w8, z20, z16),
+ svdot_za64_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (0, z0, z0),
+ svdot_za64_vg1x4 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w0, z0, z0),
+ svdot_za64_vg1x4 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** sdot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8, z0, z4),
+ svdot_za64_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** sdot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8, z0, z18),
+ svdot_za64_vg1x4 (w8, z0, z18))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** sdot za\.d\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8, z18, z0),
+ svdot_za64_vg1x4 (w8, z18, z0))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** sdot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8, z0, z23),
+ svdot_za64_vg1x4 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** sdot za\.d\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8, z23, z0),
+ svdot_za64_vg1x4 (w8, z23, z0))
+
+/*
+** dot_w8_z4_z28:
+** sdot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8, z4, z28),
+ svdot_za64_vg1x4 (w8, z4, z28))
+
+/*
+** dot_w8_z28_z0:
+** sdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8, z28, z0),
+ svdot_za64_vg1x4 (w8, z28, z0))
+
+/*
+** dot_w8p1_z4_z0:
+** sdot za\.d\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8 + 1, z4, z0),
+ svdot_za64_vg1x4 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** sdot za\.d\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8 + 2, z4, z0),
+ svdot_za64_vg1x4 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** sdot za\.d\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w11 + 4, z4, z0),
+ svdot_za64_vg1x4 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** sdot za\.d\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8 + 7, z4, z0),
+ svdot_za64_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.d\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8 + 8, z4, z4),
+ svdot_za64_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sdot za\.d\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svint16x4_t,
+ svdot_za64_s16_vg1x4 (w8 - 1, z4, z0),
+ svdot_za64_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sdot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (0, z1, z0),
+ svdot_za64_vg1x4 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sdot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w0, z1, z0),
+ svdot_za64_vg1x4 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** sdot za\.d\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w8, z1, z0),
+ svdot_za64_vg1x4 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** sdot za\.d\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w8 + 1, z1, z0),
+ svdot_za64_vg1x4 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p4_z20_z0:
+** sdot za\.d\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w8 + 4, z20, z0),
+ svdot_za64_vg1x4 (w8 + 4, z20, z0))
+
+/*
+** dot_single_w8p6_z27_z0:
+** sdot za\.d\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w8 + 6, z27, z0),
+ svdot_za64_vg1x4 (w8 + 6, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** sdot za\.d\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w8 + 7, z1, z0),
+ svdot_za64_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sdot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w8 + 8, z1, z0),
+ svdot_za64_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sdot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w0 - 1, z1, z0),
+ svdot_za64_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sdot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w8, z0, z15),
+ svdot_za64_vg1x4 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sdot za\.d\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint16x4_t, svint16_t,
+ svdot_single_za64_s16_vg1x4 (w8, z20, z16),
+ svdot_za64_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (0, z0, z0),
+ svdot_za64_vg1x2 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w0, z0, z0),
+ svdot_za64_vg1x2 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** udot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8, z0, z4),
+ svdot_za64_vg1x2 (w8, z0, z4))
+
+/*
+** dot_w8_z4_z18:
+** udot za\.d\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8, z4, z18),
+ svdot_za64_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** udot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8, z0, z23),
+ svdot_za64_vg1x2 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** udot za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8, z23, z0),
+ svdot_za64_vg1x2 (w8, z23, z0))
+
+/*
+** dot_w8_z18_z28:
+** udot za\.d\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8, z18, z28),
+ svdot_za64_vg1x2 (w8, z18, z28))
+
+/*
+** dot_w8_z28_z4:
+** udot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8, z28, z4),
+ svdot_za64_vg1x2 (w8, z28, z4))
+
+/*
+** dot_w8p1_z4_z0:
+** udot za\.d\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8 + 1, z4, z0),
+ svdot_za64_vg1x2 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** udot za\.d\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8 + 2, z4, z0),
+ svdot_za64_vg1x2 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** udot za\.d\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w11 + 4, z4, z0),
+ svdot_za64_vg1x2 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** udot za\.d\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8 + 7, z4, z0),
+ svdot_za64_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.d\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8 + 8, z4, z4),
+ svdot_za64_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** udot za\.d\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svuint16x2_t,
+ svdot_za64_u16_vg1x2 (w8 - 1, z4, z0),
+ svdot_za64_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (0, z1, z0),
+ svdot_za64_vg1x2 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w0, z1, z0),
+ svdot_za64_vg1x2 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** udot za\.d\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w8, z1, z0),
+ svdot_za64_vg1x2 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** udot za\.d\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w8 + 1, z1, z0),
+ svdot_za64_vg1x2 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p2_z20_z0:
+** udot za\.d\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w8 + 2, z20, z0),
+ svdot_za64_vg1x2 (w8 + 2, z20, z0))
+
+/*
+** dot_single_w11p4_z27_z0:
+** udot za\.d\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w11 + 4, z27, z0),
+ svdot_za64_vg1x2 (w11 + 4, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** udot za\.d\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w8 + 7, z1, z0),
+ svdot_za64_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w8 + 8, z1, z0),
+ svdot_za64_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w0 - 1, z1, z0),
+ svdot_za64_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** udot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w8, z0, z15),
+ svdot_za64_vg1x2 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** udot za\.d\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint16x2_t, svuint16_t,
+ svdot_single_za64_u16_vg1x2 (w8, z20, z16),
+ svdot_za64_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (0, z0, z0),
+ svdot_za64_vg1x4 (0, z0, z0))
+
+/*
+** dot_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w0, z0, z0),
+ svdot_za64_vg1x4 (w0, z0, z0))
+
+/*
+** dot_w8_z0_z4:
+** udot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z4, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8, z0, z4),
+ svdot_za64_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** udot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8, z0, z18),
+ svdot_za64_vg1x4 (w8, z0, z18))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** udot za\.d\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8, z18, z0),
+ svdot_za64_vg1x4 (w8, z18, z0))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** udot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8, z0, z23),
+ svdot_za64_vg1x4 (w8, z0, z23))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** udot za\.d\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8, z23, z0),
+ svdot_za64_vg1x4 (w8, z23, z0))
+
+/*
+** dot_w8_z4_z28:
+** udot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8, z4, z28),
+ svdot_za64_vg1x4 (w8, z4, z28))
+
+/*
+** dot_w8_z28_z0:
+** udot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8, z28, z0),
+ svdot_za64_vg1x4 (w8, z28, z0))
+
+/*
+** dot_w8p1_z4_z0:
+** udot za\.d\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8 + 1, z4, z0),
+ svdot_za64_vg1x4 (w8 + 1, z4, z0))
+
+/*
+** dot_w8p2_z4_z0:
+** udot za\.d\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8 + 2, z4, z0),
+ svdot_za64_vg1x4 (w8 + 2, z4, z0))
+
+/*
+** dot_w11p4_z4_z0:
+** udot za\.d\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w11 + 4, z4, z0),
+ svdot_za64_vg1x4 (w11 + 4, z4, z0))
+
+/*
+** dot_w8p7_z4_z0:
+** udot za\.d\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8 + 7, z4, z0),
+ svdot_za64_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** dot_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.d\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z4_z4, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8 + 8, z4, z4),
+ svdot_za64_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** udot za\.d\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svuint16x4_t,
+ svdot_za64_u16_vg1x4 (w8 - 1, z4, z0),
+ svdot_za64_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** udot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (0, z1, z0),
+ svdot_za64_vg1x4 (0, z1, z0))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** udot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w0, z1, z0),
+ svdot_za64_vg1x4 (w0, z1, z0))
+
+/*
+** dot_single_w8_z1_z0:
+** udot za\.d\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w8, z1, z0),
+ svdot_za64_vg1x4 (w8, z1, z0))
+
+/*
+** dot_single_w8p1_z1_z0:
+** udot za\.d\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w8 + 1, z1, z0),
+ svdot_za64_vg1x4 (w8 + 1, z1, z0))
+
+/*
+** dot_single_w8p4_z20_z0:
+** udot za\.d\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w8 + 4, z20, z0),
+ svdot_za64_vg1x4 (w8 + 4, z20, z0))
+
+/*
+** dot_single_w8p6_z27_z0:
+** udot za\.d\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w8 + 6, z27, z0),
+ svdot_za64_vg1x4 (w8 + 6, z27, z0))
+
+/*
+** dot_single_w8p7_z1_z0:
+** udot za\.d\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w8 + 7, z1, z0),
+ svdot_za64_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** udot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w8 + 8, z1, z0),
+ svdot_za64_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** udot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w0 - 1, z1, z0),
+ svdot_za64_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** udot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w8, z0, z15),
+ svdot_za64_vg1x4 (w8, z0, z15))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** udot za\.d\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint16x4_t, svuint16_t,
+ svdot_single_za64_u16_vg1x4 (w8, z20, z16),
+ svdot_za64_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_bf16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_bf16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_bf16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_bf16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_bf16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_bf16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ z17 = svld1_bf16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ z22 = svld1_bf16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ z28 = svld1_bf16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_bf16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_bf16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_bf16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_bf16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_bf16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_bf16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_bf16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_bf16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_bf16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_bf16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_bf16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_bf16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_bf16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_bf16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ z17 = svld1_bf16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ z22 = svld1_bf16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ z28 = svld1_bf16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_bf16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_bf16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_bf16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_bf16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_bf16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_bf16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_bf16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_bf16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_f16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_base, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_index, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_1, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_f16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_2, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_f16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_14, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_16, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_f16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_f16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_f16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z17, svfloat16x2_t, float16_t,
+ z17 = svld1_f16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z22, svfloat16x2_t, float16_t,
+ z22 = svld1_f16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z28, svfloat16x2_t, float16_t,
+ z28 = svld1_f16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn0, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn7, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_f16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn15, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_f16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_0, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_1, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_f16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_2, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_f16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_14, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_16, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_f16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_f16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_f16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_base, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_index, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_1, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_2, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_3, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_f16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_4, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_f16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_28, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_f16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_32, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_f16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_f16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_f16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_f16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z17, svfloat16x4_t, float16_t,
+ z17 = svld1_f16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z22, svfloat16x4_t, float16_t,
+ z22 = svld1_f16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z28, svfloat16x4_t, float16_t,
+ z28 = svld1_f16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn0, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn7, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_f16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn15, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_f16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_0, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_1, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_2, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_3, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_f16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_4, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_f16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_28, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_f16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_32, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_f16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_f16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_f16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_f32_base:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_base, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_index:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_index, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_1, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw ()),
+ z0 = svld1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ld1_f32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_2, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ld1_f32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_14, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_16, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw ()),
+ z0 = svld1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ld1_f32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ld1_f32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ld1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ld1_f32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z17, svfloat32x2_t, float32_t,
+ z17 = svld1_f32_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_z22:
+** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z22, svfloat32x2_t, float32_t,
+ z22 = svld1_f32_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_z28:
+** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z28, svfloat32x2_t, float32_t,
+ z28 = svld1_f32_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn0, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn7, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_f32_pn15:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn15, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_f32_0:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_0, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_1, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_f32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_2, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_f32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_14, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_16, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_f32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_f32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_f32_base:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_base, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_index:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_index, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_1, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw ()),
+ z0 = svld1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_2, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_3, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ld1_f32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_4, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ld1_f32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_28, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ld1_f32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_32, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw ()),
+ z0 = svld1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ld1_f32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ld1_f32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ld1_f32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ld1_f32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z17, svfloat32x4_t, float32_t,
+ z17 = svld1_f32_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_z22:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z22, svfloat32x4_t, float32_t,
+ z22 = svld1_f32_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_z28:
+** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z28, svfloat32x4_t, float32_t,
+ z28 = svld1_f32_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn0, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn7, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_f32_pn15:
+** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn15, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_f32_0:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_0, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_1, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_2, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_3, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_f32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_4, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_f32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_28, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_f32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_32, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_f32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_f32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_f32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_f64_base:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_base, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_index:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_index, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_1, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd ()),
+ z0 = svld1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ld1_f64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_2, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ld1_f64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_14, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_16, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd ()),
+ z0 = svld1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ld1_f64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ld1_f64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ld1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ld1_f64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z17, svfloat64x2_t, float64_t,
+ z17 = svld1_f64_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_z22:
+** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z22, svfloat64x2_t, float64_t,
+ z22 = svld1_f64_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_z28:
+** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z28, svfloat64x2_t, float64_t,
+ z28 = svld1_f64_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn0, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn7, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_f64_pn15:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn15, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_f64_0:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_0, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_1, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_f64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_2, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_f64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_14, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_16, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_f64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_f64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_f64_base:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_base, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_index:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_index, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_1, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd ()),
+ z0 = svld1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_2, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_3, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ld1_f64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_4, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ld1_f64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_28, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ld1_f64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_32, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd ()),
+ z0 = svld1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ld1_f64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ld1_f64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ld1_f64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ld1_f64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z17, svfloat64x4_t, float64_t,
+ z17 = svld1_f64_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_z22:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z22, svfloat64x4_t, float64_t,
+ z22 = svld1_f64_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_z28:
+** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z28, svfloat64x4_t, float64_t,
+ z28 = svld1_f64_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn0, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn7, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_f64_pn15:
+** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn15, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_f64_0:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_0, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_1, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_2, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_3, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_f64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_4, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_f64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_28, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_f64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_32, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_f64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_f64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_f64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_s16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_base, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_index, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_1, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_s16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_2, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_s16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_14, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_16, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m1, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_s16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m2, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_s16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m16, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m18, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_s16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z17, svint16x2_t, int16_t,
+ z17 = svld1_s16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z22, svint16x2_t, int16_t,
+ z22 = svld1_s16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z28, svint16x2_t, int16_t,
+ z28 = svld1_s16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn0, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn7, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn15, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_0, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_1, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_2, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_14, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_16, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m1, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m2, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m16, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m18, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_x1, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_s16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_base, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_index, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_1, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_2, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_3, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_s16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_4, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_s16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_28, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_s16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_32, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m1, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m2, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m3, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_s16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s16_m4, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_s16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m32, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_s16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m36, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_s16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z17, svint16x4_t, int16_t,
+ z17 = svld1_s16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z22, svint16x4_t, int16_t,
+ z22 = svld1_s16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z28, svint16x4_t, int16_t,
+ z28 = svld1_s16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn0, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn7, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn15, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_0, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_1, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_2, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_3, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_4, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_28, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_32, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m1, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m2, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m3, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m4, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m32, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m36, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_x1, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_s32_base:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_base, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_index:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_index, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_1, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw ()),
+ z0 = svld1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ld1_s32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_2, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ld1_s32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_14, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_16, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m1, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw ()),
+ z0 = svld1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ld1_s32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m2, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ld1_s32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m16, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ld1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m18, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ld1_s32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z17, svint32x2_t, int32_t,
+ z17 = svld1_s32_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_z22:
+** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z22, svint32x2_t, int32_t,
+ z22 = svld1_s32_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_z28:
+** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z28, svint32x2_t, int32_t,
+ z28 = svld1_s32_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn0, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn7, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s32_pn15:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn15, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s32_0:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_0, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_1, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_2, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_14, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_16, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m1, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m2, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m16, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m18, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_x1, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_s32_base:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_base, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_index:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_index, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_1, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw ()),
+ z0 = svld1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_2, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_3, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ld1_s32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_4, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ld1_s32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_28, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ld1_s32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_32, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m1, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw ()),
+ z0 = svld1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m2, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m3, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ld1_s32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s32_m4, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ld1_s32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m32, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ld1_s32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m36, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ld1_s32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z17, svint32x4_t, int32_t,
+ z17 = svld1_s32_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_z22:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z22, svint32x4_t, int32_t,
+ z22 = svld1_s32_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_z28:
+** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z28, svint32x4_t, int32_t,
+ z28 = svld1_s32_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn0, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn7, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s32_pn15:
+** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn15, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s32_0:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_0, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_1, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_2, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_3, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_4, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_28, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_32, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m1, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m2, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m3, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m4, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m32, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m36, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_x1, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_s64_base:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_base, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_index:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_index, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_1, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd ()),
+ z0 = svld1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ld1_s64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_2, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ld1_s64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_14, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_16, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m1, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd ()),
+ z0 = svld1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ld1_s64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m2, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ld1_s64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m16, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ld1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m18, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ld1_s64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z17, svint64x2_t, int64_t,
+ z17 = svld1_s64_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_z22:
+** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z22, svint64x2_t, int64_t,
+ z22 = svld1_s64_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_z28:
+** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z28, svint64x2_t, int64_t,
+ z28 = svld1_s64_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn0, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn7, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s64_pn15:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn15, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s64_0:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_0, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_1, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_2, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_14, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_16, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m1, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m2, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m16, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m18, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_x1, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_s64_base:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_base, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_index:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_index, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_1, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd ()),
+ z0 = svld1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_2, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_3, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ld1_s64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_4, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ld1_s64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_28, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ld1_s64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_32, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m1, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd ()),
+ z0 = svld1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m2, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m3, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ld1_s64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s64_m4, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ld1_s64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m32, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ld1_s64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m36, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ld1_s64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z17, svint64x4_t, int64_t,
+ z17 = svld1_s64_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_z22:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z22, svint64x4_t, int64_t,
+ z22 = svld1_s64_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_z28:
+** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z28, svint64x4_t, int64_t,
+ z28 = svld1_s64_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn0, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn7, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s64_pn15:
+** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn15, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s64_0:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_0, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_1, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_2, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_3, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_4, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_28, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_32, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m1, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m2, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m3, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m4, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m32, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m36, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_x1, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_s8_base:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_base, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_index:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_index, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_1, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb ()),
+ z0 = svld1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ld1_s8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_2, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ld1_s8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_14, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_16, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m1, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb ()),
+ z0 = svld1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ld1_s8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m2, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ld1_s8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m16, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ld1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m18, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ld1_s8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z17, svint8x2_t, int8_t,
+ z17 = svld1_s8_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_z22:
+** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z22, svint8x2_t, int8_t,
+ z22 = svld1_s8_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_z28:
+** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z28, svint8x2_t, int8_t,
+ z28 = svld1_s8_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn0, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn7, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s8_pn15:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn15, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s8_0:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_0, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_1, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_2, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_14, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_16, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m1, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m2, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m16, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m18, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_x1, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_s8_base:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_base, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_index:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_index, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_1, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb ()),
+ z0 = svld1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_2, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_3, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ld1_s8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_4, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ld1_s8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_28, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ld1_s8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_32, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m1, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb ()),
+ z0 = svld1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m2, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m3, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ld1_s8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s8_m4, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ld1_s8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m32, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ld1_s8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m36, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ld1_s8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z17, svint8x4_t, int8_t,
+ z17 = svld1_s8_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_z22:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z22, svint8x4_t, int8_t,
+ z22 = svld1_s8_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_z28:
+** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z28, svint8x4_t, int8_t,
+ z28 = svld1_s8_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn0, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn7, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s8_pn15:
+** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn15, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s8_0:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_0, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_1, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_2, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_3, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_4, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_28, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_32, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m1, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m2, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m3, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m4, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m32, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m36, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_x1, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_u16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_base, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_index, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_1, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_u16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_2, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_u16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_14, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_16, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_u16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_u16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_u16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z17, svuint16x2_t, uint16_t,
+ z17 = svld1_u16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z22, svuint16x2_t, uint16_t,
+ z22 = svld1_u16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z28, svuint16x2_t, uint16_t,
+ z28 = svld1_u16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn0, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn7, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn15, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_0, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_1, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_2, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_14, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_16, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_u16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_base, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_index, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_1, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_2, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_3, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_u16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_4, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_u16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_28, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_u16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_32, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_u16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_u16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_u16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_u16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z17, svuint16x4_t, uint16_t,
+ z17 = svld1_u16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z22, svuint16x4_t, uint16_t,
+ z22 = svld1_u16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z28, svuint16x4_t, uint16_t,
+ z28 = svld1_u16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn0, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn7, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn15, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_0, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_1, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_2, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_3, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_4, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_28, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_32, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_u32_base:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_base, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_index:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_index, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_1, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw ()),
+ z0 = svld1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ld1_u32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_2, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ld1_u32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_14, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_16, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw ()),
+ z0 = svld1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ld1_u32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ld1_u32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ld1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ld1_u32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z17, svuint32x2_t, uint32_t,
+ z17 = svld1_u32_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_z22:
+** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z22, svuint32x2_t, uint32_t,
+ z22 = svld1_u32_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_z28:
+** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z28, svuint32x2_t, uint32_t,
+ z28 = svld1_u32_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn0, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn7, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u32_pn15:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn15, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u32_0:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_0, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_1, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_2, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_14, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_16, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_u32_base:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_base, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_index:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_index, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_1, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw ()),
+ z0 = svld1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_2, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_3, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ld1_u32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_4, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ld1_u32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_28, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ld1_u32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_32, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw ()),
+ z0 = svld1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ld1_u32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ld1_u32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ld1_u32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ld1_u32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z17, svuint32x4_t, uint32_t,
+ z17 = svld1_u32_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_z22:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z22, svuint32x4_t, uint32_t,
+ z22 = svld1_u32_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_z28:
+** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z28, svuint32x4_t, uint32_t,
+ z28 = svld1_u32_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn0, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn7, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u32_pn15:
+** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn15, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u32_0:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_0, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_1, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_2, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_3, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_4, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_28, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_32, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_u64_base:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_base, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_index:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_index, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_1, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd ()),
+ z0 = svld1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ld1_u64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_2, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ld1_u64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_14, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_16, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd ()),
+ z0 = svld1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ld1_u64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ld1_u64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ld1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ld1_u64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z17, svuint64x2_t, uint64_t,
+ z17 = svld1_u64_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_z22:
+** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z22, svuint64x2_t, uint64_t,
+ z22 = svld1_u64_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_z28:
+** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z28, svuint64x2_t, uint64_t,
+ z28 = svld1_u64_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn0, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn7, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u64_pn15:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn15, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u64_0:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_0, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_1, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_2, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_14, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_16, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_u64_base:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_base, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_index:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_index, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_1, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd ()),
+ z0 = svld1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_2, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_3, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ld1_u64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_4, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ld1_u64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_28, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ld1_u64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_32, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd ()),
+ z0 = svld1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ld1_u64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ld1_u64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ld1_u64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ld1_u64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z17, svuint64x4_t, uint64_t,
+ z17 = svld1_u64_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_z22:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z22, svuint64x4_t, uint64_t,
+ z22 = svld1_u64_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_z28:
+** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z28, svuint64x4_t, uint64_t,
+ z28 = svld1_u64_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn0, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn7, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u64_pn15:
+** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn15, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u64_0:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_0, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_1, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_2, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_3, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_4, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_28, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_32, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_u8_base:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_base, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_index:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_index, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_1, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb ()),
+ z0 = svld1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ld1_u8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_2, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ld1_u8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_14, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_16, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb ()),
+ z0 = svld1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ld1_u8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ld1_u8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ld1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ld1_u8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z17, svuint8x2_t, uint8_t,
+ z17 = svld1_u8_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_z22:
+** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z22, svuint8x2_t, uint8_t,
+ z22 = svld1_u8_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_z28:
+** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z28, svuint8x2_t, uint8_t,
+ z28 = svld1_u8_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn0, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn7, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u8_pn15:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn15, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u8_0:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_0, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_1, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_2, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_14, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_16, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ld1_u8_base:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_base, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_index:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_index, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_1, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb ()),
+ z0 = svld1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_2, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_3, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ld1_u8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_4, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ld1_u8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_28, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ld1_u8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_32, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb ()),
+ z0 = svld1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ld1_u8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ld1_u8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ld1_u8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ld1_u8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z17, svuint8x4_t, uint8_t,
+ z17 = svld1_u8_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_z22:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z22, svuint8x4_t, uint8_t,
+ z22 = svld1_u8_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_z28:
+** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z28, svuint8x4_t, uint8_t,
+ z28 = svld1_u8_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn0, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn7, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u8_pn15:
+** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn15, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u8_0:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_0, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_1, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_2, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_3, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_4, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_28, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_32, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_bf16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_bf16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_bf16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_bf16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_bf16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_bf16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ z17 = svldnt1_bf16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ z22 = svldnt1_bf16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ z28 = svldnt1_bf16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_bf16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_bf16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_bf16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_bf16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_bf16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_bf16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_bf16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_bf16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_bf16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_bf16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_bf16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_bf16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_bf16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_bf16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ z17 = svldnt1_bf16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ z22 = svldnt1_bf16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ z28 = svldnt1_bf16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_bf16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_bf16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_bf16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_bf16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_bf16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_bf16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_bf16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_bf16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_f16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_base, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_index, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_f16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_f16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_14, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_f16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_f16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_f16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z17, svfloat16x2_t, float16_t,
+ z17 = svldnt1_f16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z22, svfloat16x2_t, float16_t,
+ z22 = svldnt1_f16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z28, svfloat16x2_t, float16_t,
+ z28 = svldnt1_f16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn0, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn7, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_f16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn15, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_f16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_0, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_f16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_f16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_14, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_f16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_f16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_f16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_base, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_index, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_f16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_f16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_28, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_f16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_f16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_f16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_f16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_f16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z17, svfloat16x4_t, float16_t,
+ z17 = svldnt1_f16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z22, svfloat16x4_t, float16_t,
+ z22 = svldnt1_f16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z28, svfloat16x4_t, float16_t,
+ z28 = svldnt1_f16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn0, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn7, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_f16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn15, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_f16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_0, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_f16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_f16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_28, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_f16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_f16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_f16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_f16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_f32_base:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_base, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_index:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_index, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ldnt1_f32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ldnt1_f32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_14, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ldnt1_f32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ldnt1_f32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ldnt1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ldnt1_f32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z17, svfloat32x2_t, float32_t,
+ z17 = svldnt1_f32_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_z22:
+** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z22, svfloat32x2_t, float32_t,
+ z22 = svldnt1_f32_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_z28:
+** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z28, svfloat32x2_t, float32_t,
+ z28 = svldnt1_f32_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn0, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn7, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_f32_pn15:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn15, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_f32_0:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_0, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_f32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_f32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_14, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_f32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_f32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_f32_base:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_base, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_index:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_index, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ldnt1_f32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ldnt1_f32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_28, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ldnt1_f32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ldnt1_f32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ldnt1_f32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ldnt1_f32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ldnt1_f32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z17, svfloat32x4_t, float32_t,
+ z17 = svldnt1_f32_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_z22:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z22, svfloat32x4_t, float32_t,
+ z22 = svldnt1_f32_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_z28:
+** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z28, svfloat32x4_t, float32_t,
+ z28 = svldnt1_f32_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn0, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn7, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_f32_pn15:
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn15, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_f32_0:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_0, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_f32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_f32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_28, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_f32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_f32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_f32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_f32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_f64_base:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_base, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_index:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_index, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ldnt1_f64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ldnt1_f64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_14, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ldnt1_f64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ldnt1_f64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ldnt1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ldnt1_f64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z17, svfloat64x2_t, float64_t,
+ z17 = svldnt1_f64_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_z22:
+** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z22, svfloat64x2_t, float64_t,
+ z22 = svldnt1_f64_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_z28:
+** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z28, svfloat64x2_t, float64_t,
+ z28 = svldnt1_f64_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn0, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn7, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_f64_pn15:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn15, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_f64_0:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_0, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_f64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_f64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_14, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_f64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_f64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_f64_base:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_base, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_index:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_index, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ldnt1_f64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ldnt1_f64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_28, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ldnt1_f64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ldnt1_f64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ldnt1_f64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ldnt1_f64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ldnt1_f64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z17, svfloat64x4_t, float64_t,
+ z17 = svldnt1_f64_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_z22:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z22, svfloat64x4_t, float64_t,
+ z22 = svldnt1_f64_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_z28:
+** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z28, svfloat64x4_t, float64_t,
+ z28 = svldnt1_f64_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn0, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn7, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_f64_pn15:
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn15, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_f64_0:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_0, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_f64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_f64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_28, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_f64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_f64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_f64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_f64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_s16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_base, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_index, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_1, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_s16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_2, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_s16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_14, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_16, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m1, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_s16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m2, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_s16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m16, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m18, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_s16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z17, svint16x2_t, int16_t,
+ z17 = svldnt1_s16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z22, svint16x2_t, int16_t,
+ z22 = svldnt1_s16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z28, svint16x2_t, int16_t,
+ z28 = svldnt1_s16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn0, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn7, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn15, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_0, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_1, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_2, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_14, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_16, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m1, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m2, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m16, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m18, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_x1, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_s16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_base, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_index, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_1, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_2, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_3, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_s16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_4, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_s16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_28, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_s16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_32, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m1, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m2, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m3, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_s16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s16_m4, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_s16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m32, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_s16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m36, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_s16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z17, svint16x4_t, int16_t,
+ z17 = svldnt1_s16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z22, svint16x4_t, int16_t,
+ z22 = svldnt1_s16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z28, svint16x4_t, int16_t,
+ z28 = svldnt1_s16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn0, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn7, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn15, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_0, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_1, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_2, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_3, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_4, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_28, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_32, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m1, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m2, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m3, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m4, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m32, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m36, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_x1, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_s32_base:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_base, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_index:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_index, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_1, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ldnt1_s32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_2, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ldnt1_s32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_14, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_16, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m1, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ldnt1_s32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m2, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ldnt1_s32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m16, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ldnt1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m18, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ldnt1_s32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z17, svint32x2_t, int32_t,
+ z17 = svldnt1_s32_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_z22:
+** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z22, svint32x2_t, int32_t,
+ z22 = svldnt1_s32_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_z28:
+** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z28, svint32x2_t, int32_t,
+ z28 = svldnt1_s32_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn0, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn7, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s32_pn15:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn15, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s32_0:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_0, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_1, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_2, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_14, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_16, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m1, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m2, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m16, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m18, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_x1, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_s32_base:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_base, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_index:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_index, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_1, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_2, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_3, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ldnt1_s32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_4, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ldnt1_s32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_28, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ldnt1_s32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_32, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m1, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m2, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m3, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ldnt1_s32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s32_m4, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ldnt1_s32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m32, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ldnt1_s32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m36, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ldnt1_s32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z17, svint32x4_t, int32_t,
+ z17 = svldnt1_s32_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_z22:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z22, svint32x4_t, int32_t,
+ z22 = svldnt1_s32_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_z28:
+** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z28, svint32x4_t, int32_t,
+ z28 = svldnt1_s32_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn0, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn7, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s32_pn15:
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn15, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s32_0:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_0, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_1, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_2, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_3, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_4, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_28, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_32, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m1, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m2, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m3, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m4, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m32, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m36, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_x1, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_s64_base:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_base, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_index:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_index, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_1, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ldnt1_s64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_2, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ldnt1_s64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_14, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_16, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m1, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ldnt1_s64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m2, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ldnt1_s64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m16, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ldnt1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m18, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ldnt1_s64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z17, svint64x2_t, int64_t,
+ z17 = svldnt1_s64_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_z22:
+** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z22, svint64x2_t, int64_t,
+ z22 = svldnt1_s64_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_z28:
+** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z28, svint64x2_t, int64_t,
+ z28 = svldnt1_s64_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn0, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn7, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s64_pn15:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn15, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s64_0:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_0, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_1, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_2, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_14, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_16, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m1, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m2, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m16, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m18, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_x1, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_s64_base:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_base, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_index:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_index, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_1, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_2, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_3, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ldnt1_s64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_4, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ldnt1_s64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_28, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ldnt1_s64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_32, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m1, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m2, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m3, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ldnt1_s64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s64_m4, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ldnt1_s64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m32, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ldnt1_s64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m36, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ldnt1_s64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z17, svint64x4_t, int64_t,
+ z17 = svldnt1_s64_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_z22:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z22, svint64x4_t, int64_t,
+ z22 = svldnt1_s64_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_z28:
+** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z28, svint64x4_t, int64_t,
+ z28 = svldnt1_s64_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn0, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn7, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s64_pn15:
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn15, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s64_0:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_0, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_1, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_2, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_3, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_4, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_28, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_32, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m1, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m2, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m3, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m4, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m32, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m36, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_x1, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_s8_base:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_base, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_index:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_index, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_1, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ldnt1_s8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_2, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ldnt1_s8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_14, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_16, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m1, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ldnt1_s8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m2, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ldnt1_s8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m16, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ldnt1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m18, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ldnt1_s8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z17, svint8x2_t, int8_t,
+ z17 = svldnt1_s8_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_z22:
+** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z22, svint8x2_t, int8_t,
+ z22 = svldnt1_s8_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_z28:
+** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z28, svint8x2_t, int8_t,
+ z28 = svldnt1_s8_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn0, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn7, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s8_pn15:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn15, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s8_0:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_0, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_1, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_2, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_14, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_16, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m1, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m2, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m16, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m18, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_x1, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_s8_base:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_base, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_index:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_index, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_1, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_2, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_3, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ldnt1_s8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_4, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ldnt1_s8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_28, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ldnt1_s8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_32, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m1, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m2, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m3, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ldnt1_s8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s8_m4, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ldnt1_s8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m32, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ldnt1_s8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m36, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ldnt1_s8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z17, svint8x4_t, int8_t,
+ z17 = svldnt1_s8_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_z22:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z22, svint8x4_t, int8_t,
+ z22 = svldnt1_s8_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_z28:
+** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z28, svint8x4_t, int8_t,
+ z28 = svldnt1_s8_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn0, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn7, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s8_pn15:
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn15, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s8_0:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_0, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_1, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_2, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_3, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_4, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_28, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_32, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m1, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m2, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m3, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m4, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m32, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m36, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_x1, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_u16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_base, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_index, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_u16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_u16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_14, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_u16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_u16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_u16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z17, svuint16x2_t, uint16_t,
+ z17 = svldnt1_u16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z22, svuint16x2_t, uint16_t,
+ z22 = svldnt1_u16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z28, svuint16x2_t, uint16_t,
+ z28 = svldnt1_u16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn0, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn7, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn15, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_0, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_14, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_u16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_base, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_index, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_u16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_u16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_28, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_u16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_u16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_u16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_u16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_u16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z17, svuint16x4_t, uint16_t,
+ z17 = svldnt1_u16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z22, svuint16x4_t, uint16_t,
+ z22 = svldnt1_u16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z28, svuint16x4_t, uint16_t,
+ z28 = svldnt1_u16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn0, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn7, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn15, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_0, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_28, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_u32_base:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_base, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_index:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_index, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ldnt1_u32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ldnt1_u32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_14, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ldnt1_u32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ldnt1_u32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ldnt1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ldnt1_u32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z17, svuint32x2_t, uint32_t,
+ z17 = svldnt1_u32_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_z22:
+** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z22, svuint32x2_t, uint32_t,
+ z22 = svldnt1_u32_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_z28:
+** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z28, svuint32x2_t, uint32_t,
+ z28 = svldnt1_u32_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn0, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn7, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u32_pn15:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn15, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u32_0:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_0, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_14, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_u32_base:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_base, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_index:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_index, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ldnt1_u32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ldnt1_u32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_28, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ldnt1_u32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ldnt1_u32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ldnt1_u32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ldnt1_u32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ldnt1_u32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z17, svuint32x4_t, uint32_t,
+ z17 = svldnt1_u32_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_z22:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z22, svuint32x4_t, uint32_t,
+ z22 = svldnt1_u32_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_z28:
+** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z28, svuint32x4_t, uint32_t,
+ z28 = svldnt1_u32_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn0, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn7, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u32_pn15:
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn15, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u32_0:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_0, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_28, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_u64_base:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_base, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_index:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_index, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ldnt1_u64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ldnt1_u64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_14, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ldnt1_u64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ldnt1_u64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ldnt1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ldnt1_u64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z17, svuint64x2_t, uint64_t,
+ z17 = svldnt1_u64_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_z22:
+** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z22, svuint64x2_t, uint64_t,
+ z22 = svldnt1_u64_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_z28:
+** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z28, svuint64x2_t, uint64_t,
+ z28 = svldnt1_u64_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn0, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn7, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u64_pn15:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn15, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u64_0:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_0, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_14, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_u64_base:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_base, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_index:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_index, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ldnt1_u64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ldnt1_u64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_28, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ldnt1_u64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ldnt1_u64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ldnt1_u64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ldnt1_u64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ldnt1_u64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z17, svuint64x4_t, uint64_t,
+ z17 = svldnt1_u64_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_z22:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z22, svuint64x4_t, uint64_t,
+ z22 = svldnt1_u64_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_z28:
+** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z28, svuint64x4_t, uint64_t,
+ z28 = svldnt1_u64_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn0, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn7, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u64_pn15:
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn15, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u64_0:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_0, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_28, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_u8_base:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_base, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_index:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_index, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ldnt1_u8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ldnt1_u8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_14, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ldnt1_u8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ldnt1_u8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ldnt1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ldnt1_u8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z17, svuint8x2_t, uint8_t,
+ z17 = svldnt1_u8_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_z22:
+** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z22, svuint8x2_t, uint8_t,
+ z22 = svldnt1_u8_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_z28:
+** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z28, svuint8x2_t, uint8_t,
+ z28 = svldnt1_u8_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn0, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn7, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u8_pn15:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn15, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u8_0:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_0, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_14, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ldnt1_u8_base:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_base, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_index:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_index, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ldnt1_u8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ldnt1_u8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_28, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ldnt1_u8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ldnt1_u8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ldnt1_u8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ldnt1_u8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ldnt1_u8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z17, svuint8x4_t, uint8_t,
+ z17 = svldnt1_u8_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_z22:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z22, svuint8x4_t, uint8_t,
+ z22 = svldnt1_u8_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_z28:
+** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z28, svuint8x4_t, uint8_t,
+ z28 = svldnt1_u8_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn0, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn7, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u8_pn15:
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn15, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u8_0:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_0, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_28, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#define SHARED_ZT0
+#include "test_sme2_acle.h"
+
+/*
+** ldr_zt0_x0:
+** ldr zt0, \[x0\]
+** ret
+*/
+PROTO (ldr_zt0_x0, void, (char *x0)) { svldr_zt (0, x0); }
+
+/*
+** ldr_zt0_x0p1:
+** add (x[0-9]+), x0, #?1
+** ldr zt0, \[\1\]
+** ret
+*/
+PROTO (ldr_zt0_x0p1, void, (char *x0)) { svldr_zt (0, x0 + 1); }
+
+/*
+** ldr_zt0_x0p64:
+** add (x[0-9]+), x0, #?64
+** ldr zt0, \[\1\]
+** ret
+*/
+PROTO (ldr_zt0_x0p64, void, (char *x0)) { svldr_zt (0, x0 + 64); }
+
+/*
+** ldr_zt0_x0_vl1:
+** incb x0
+** ldr zt0, \[x0\]
+** ret
+*/
+PROTO (ldr_zt0_x0_vl1, void, (char *x0)) { svldr_zt (0, x0 + svcntb()); }
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 z1\.h, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svbfloat16_t, svuint8_t, z1,
+ svluti2_lane_zt_bf16 (0, z0, 0),
+ svluti2_lane_zt_bf16 (0, z0, 0))
+
+/*
+** luti2_z18_z5_15:
+** luti2 z18\.h, zt0, z5\[15\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_15, svbfloat16_t, svuint8_t, z18,
+ svluti2_lane_zt_bf16 (0, z5, 15),
+ svluti2_lane_zt_bf16 (0, z5, 15))
+
+/*
+** luti2_z24_z7_13:
+** luti2 z24\.h, zt0, z7\[13\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_13, svbfloat16_t, svuint8_t, z24,
+ svluti2_lane_zt_bf16 (0, z7, 13),
+ svluti2_lane_zt_bf16 (0, z7, 13))
+
+/*
+** luti2_z28_z16_11:
+** luti2 z28\.h, zt0, z16\[11\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_11, svbfloat16_t, svuint8_t, z28,
+ svluti2_lane_zt_bf16 (0, z16, 11),
+ svluti2_lane_zt_bf16 (0, z16, 11))
+
+/*
+** luti2_z24_z23_1:
+** luti2 z24\.h, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svbfloat16_t, svuint8_t, z24,
+ svluti2_lane_zt_bf16 (0, z23, 1),
+ svluti2_lane_zt_bf16 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svbfloat16x2_t, svuint8_t, z1,
+ svluti2_lane_zt_bf16_x2 (0, z0, 0),
+ svluti2_lane_zt_bf16_x2 (0, z0, 0))
+
+/*
+** luti2_z18_z5_7:
+** luti2 {z18\.h - z19\.h}, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_7, svbfloat16x2_t, svuint8_t, z18,
+ svluti2_lane_zt_bf16_x2 (0, z5, 7),
+ svluti2_lane_zt_bf16_x2 (0, z5, 7))
+
+/*
+** luti2_z24_z7_6:
+** luti2 {z24\.h - z25\.h}, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_6, svbfloat16x2_t, svuint8_t, z24,
+ svluti2_lane_zt_bf16_x2 (0, z7, 6),
+ svluti2_lane_zt_bf16_x2 (0, z7, 6))
+
+/*
+** luti2_z28_z16_3:
+** luti2 {z28\.h - z29\.h}, zt0, z16\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_3, svbfloat16x2_t, svuint8_t, z28,
+ svluti2_lane_zt_bf16_x2 (0, z16, 3),
+ svluti2_lane_zt_bf16_x2 (0, z16, 3))
+
+/*
+** luti2_z24_z23_1:
+** luti2 {z24\.h - z25\.h}, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svbfloat16x2_t, svuint8_t, z24,
+ svluti2_lane_zt_bf16_x2 (0, z23, 1),
+ svluti2_lane_zt_bf16_x2 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svbfloat16x4_t, svuint8_t, z1,
+ svluti2_lane_zt_bf16_x4 (0, z0, 0),
+ svluti2_lane_zt_bf16_x4 (0, z0, 0))
+
+/*
+** luti2_z18_z5_3:
+** luti2 {[^\n]+}, zt0, z5\[3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_3, svbfloat16x4_t, svuint8_t, z18,
+ svluti2_lane_zt_bf16_x4 (0, z5, 3),
+ svluti2_lane_zt_bf16_x4 (0, z5, 3))
+
+/*
+** luti2_z24_z7_2:
+** luti2 {z24\.h - z27\.h}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_2, svbfloat16x4_t, svuint8_t, z24,
+ svluti2_lane_zt_bf16_x4 (0, z7, 2),
+ svluti2_lane_zt_bf16_x4 (0, z7, 2))
+
+/*
+** luti2_z28_z16_1:
+** luti2 {z28\.h - z31\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_1, svbfloat16x4_t, svuint8_t, z28,
+ svluti2_lane_zt_bf16_x4 (0, z16, 1),
+ svluti2_lane_zt_bf16_x4 (0, z16, 1))
+
+/*
+** luti2_z24_z23_0:
+** luti2 {z24\.h - z27\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_0, svbfloat16x4_t, svuint8_t, z24,
+ svluti2_lane_zt_bf16_x4 (0, z23, 0),
+ svluti2_lane_zt_bf16_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 z1\.h, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svfloat16_t, svuint8_t, z1,
+ svluti2_lane_zt_f16 (0, z0, 0),
+ svluti2_lane_zt_f16 (0, z0, 0))
+
+/*
+** luti2_z18_z5_15:
+** luti2 z18\.h, zt0, z5\[15\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_15, svfloat16_t, svuint8_t, z18,
+ svluti2_lane_zt_f16 (0, z5, 15),
+ svluti2_lane_zt_f16 (0, z5, 15))
+
+/*
+** luti2_z24_z7_13:
+** luti2 z24\.h, zt0, z7\[13\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_13, svfloat16_t, svuint8_t, z24,
+ svluti2_lane_zt_f16 (0, z7, 13),
+ svluti2_lane_zt_f16 (0, z7, 13))
+
+/*
+** luti2_z28_z16_11:
+** luti2 z28\.h, zt0, z16\[11\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_11, svfloat16_t, svuint8_t, z28,
+ svluti2_lane_zt_f16 (0, z16, 11),
+ svluti2_lane_zt_f16 (0, z16, 11))
+
+/*
+** luti2_z24_z23_1:
+** luti2 z24\.h, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svfloat16_t, svuint8_t, z24,
+ svluti2_lane_zt_f16 (0, z23, 1),
+ svluti2_lane_zt_f16 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svfloat16x2_t, svuint8_t, z1,
+ svluti2_lane_zt_f16_x2 (0, z0, 0),
+ svluti2_lane_zt_f16_x2 (0, z0, 0))
+
+/*
+** luti2_z18_z5_7:
+** luti2 {z18\.h - z19\.h}, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_7, svfloat16x2_t, svuint8_t, z18,
+ svluti2_lane_zt_f16_x2 (0, z5, 7),
+ svluti2_lane_zt_f16_x2 (0, z5, 7))
+
+/*
+** luti2_z24_z7_6:
+** luti2 {z24\.h - z25\.h}, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_6, svfloat16x2_t, svuint8_t, z24,
+ svluti2_lane_zt_f16_x2 (0, z7, 6),
+ svluti2_lane_zt_f16_x2 (0, z7, 6))
+
+/*
+** luti2_z28_z16_3:
+** luti2 {z28\.h - z29\.h}, zt0, z16\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_3, svfloat16x2_t, svuint8_t, z28,
+ svluti2_lane_zt_f16_x2 (0, z16, 3),
+ svluti2_lane_zt_f16_x2 (0, z16, 3))
+
+/*
+** luti2_z24_z23_1:
+** luti2 {z24\.h - z25\.h}, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svfloat16x2_t, svuint8_t, z24,
+ svluti2_lane_zt_f16_x2 (0, z23, 1),
+ svluti2_lane_zt_f16_x2 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svfloat16x4_t, svuint8_t, z1,
+ svluti2_lane_zt_f16_x4 (0, z0, 0),
+ svluti2_lane_zt_f16_x4 (0, z0, 0))
+
+/*
+** luti2_z18_z5_3:
+** luti2 {[^\n]+}, zt0, z5\[3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_3, svfloat16x4_t, svuint8_t, z18,
+ svluti2_lane_zt_f16_x4 (0, z5, 3),
+ svluti2_lane_zt_f16_x4 (0, z5, 3))
+
+/*
+** luti2_z24_z7_2:
+** luti2 {z24\.h - z27\.h}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_2, svfloat16x4_t, svuint8_t, z24,
+ svluti2_lane_zt_f16_x4 (0, z7, 2),
+ svluti2_lane_zt_f16_x4 (0, z7, 2))
+
+/*
+** luti2_z28_z16_1:
+** luti2 {z28\.h - z31\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_1, svfloat16x4_t, svuint8_t, z28,
+ svluti2_lane_zt_f16_x4 (0, z16, 1),
+ svluti2_lane_zt_f16_x4 (0, z16, 1))
+
+/*
+** luti2_z24_z23_0:
+** luti2 {z24\.h - z27\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_0, svfloat16x4_t, svuint8_t, z24,
+ svluti2_lane_zt_f16_x4 (0, z23, 0),
+ svluti2_lane_zt_f16_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 z1\.s, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svfloat32_t, svuint8_t, z1,
+ svluti2_lane_zt_f32 (0, z0, 0),
+ svluti2_lane_zt_f32 (0, z0, 0))
+
+/*
+** luti2_z18_z5_15:
+** luti2 z18\.s, zt0, z5\[15\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_15, svfloat32_t, svuint8_t, z18,
+ svluti2_lane_zt_f32 (0, z5, 15),
+ svluti2_lane_zt_f32 (0, z5, 15))
+
+/*
+** luti2_z24_z7_13:
+** luti2 z24\.s, zt0, z7\[13\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_13, svfloat32_t, svuint8_t, z24,
+ svluti2_lane_zt_f32 (0, z7, 13),
+ svluti2_lane_zt_f32 (0, z7, 13))
+
+/*
+** luti2_z28_z16_11:
+** luti2 z28\.s, zt0, z16\[11\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_11, svfloat32_t, svuint8_t, z28,
+ svluti2_lane_zt_f32 (0, z16, 11),
+ svluti2_lane_zt_f32 (0, z16, 11))
+
+/*
+** luti2_z24_z23_1:
+** luti2 z24\.s, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svfloat32_t, svuint8_t, z24,
+ svluti2_lane_zt_f32 (0, z23, 1),
+ svluti2_lane_zt_f32 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svfloat32x2_t, svuint8_t, z1,
+ svluti2_lane_zt_f32_x2 (0, z0, 0),
+ svluti2_lane_zt_f32_x2 (0, z0, 0))
+
+/*
+** luti2_z18_z5_7:
+** luti2 {z18\.s - z19\.s}, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_7, svfloat32x2_t, svuint8_t, z18,
+ svluti2_lane_zt_f32_x2 (0, z5, 7),
+ svluti2_lane_zt_f32_x2 (0, z5, 7))
+
+/*
+** luti2_z24_z7_6:
+** luti2 {z24\.s - z25\.s}, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_6, svfloat32x2_t, svuint8_t, z24,
+ svluti2_lane_zt_f32_x2 (0, z7, 6),
+ svluti2_lane_zt_f32_x2 (0, z7, 6))
+
+/*
+** luti2_z28_z16_3:
+** luti2 {z28\.s - z29\.s}, zt0, z16\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_3, svfloat32x2_t, svuint8_t, z28,
+ svluti2_lane_zt_f32_x2 (0, z16, 3),
+ svluti2_lane_zt_f32_x2 (0, z16, 3))
+
+/*
+** luti2_z24_z23_1:
+** luti2 {z24\.s - z25\.s}, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svfloat32x2_t, svuint8_t, z24,
+ svluti2_lane_zt_f32_x2 (0, z23, 1),
+ svluti2_lane_zt_f32_x2 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svfloat32x4_t, svuint8_t, z1,
+ svluti2_lane_zt_f32_x4 (0, z0, 0),
+ svluti2_lane_zt_f32_x4 (0, z0, 0))
+
+/*
+** luti2_z18_z5_3:
+** luti2 {[^\n]+}, zt0, z5\[3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_3, svfloat32x4_t, svuint8_t, z18,
+ svluti2_lane_zt_f32_x4 (0, z5, 3),
+ svluti2_lane_zt_f32_x4 (0, z5, 3))
+
+/*
+** luti2_z24_z7_2:
+** luti2 {z24\.s - z27\.s}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_2, svfloat32x4_t, svuint8_t, z24,
+ svluti2_lane_zt_f32_x4 (0, z7, 2),
+ svluti2_lane_zt_f32_x4 (0, z7, 2))
+
+/*
+** luti2_z28_z16_1:
+** luti2 {z28\.s - z31\.s}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_1, svfloat32x4_t, svuint8_t, z28,
+ svluti2_lane_zt_f32_x4 (0, z16, 1),
+ svluti2_lane_zt_f32_x4 (0, z16, 1))
+
+/*
+** luti2_z24_z23_0:
+** luti2 {z24\.s - z27\.s}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_0, svfloat32x4_t, svuint8_t, z24,
+ svluti2_lane_zt_f32_x4 (0, z23, 0),
+ svluti2_lane_zt_f32_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 z1\.h, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svint16_t, svuint8_t, z1,
+ svluti2_lane_zt_s16 (0, z0, 0),
+ svluti2_lane_zt_s16 (0, z0, 0))
+
+/*
+** luti2_z18_z5_15:
+** luti2 z18\.h, zt0, z5\[15\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_15, svint16_t, svuint8_t, z18,
+ svluti2_lane_zt_s16 (0, z5, 15),
+ svluti2_lane_zt_s16 (0, z5, 15))
+
+/*
+** luti2_z24_z7_13:
+** luti2 z24\.h, zt0, z7\[13\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_13, svint16_t, svuint8_t, z24,
+ svluti2_lane_zt_s16 (0, z7, 13),
+ svluti2_lane_zt_s16 (0, z7, 13))
+
+/*
+** luti2_z28_z16_11:
+** luti2 z28\.h, zt0, z16\[11\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_11, svint16_t, svuint8_t, z28,
+ svluti2_lane_zt_s16 (0, z16, 11),
+ svluti2_lane_zt_s16 (0, z16, 11))
+
+/*
+** luti2_z24_z23_1:
+** luti2 z24\.h, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svint16_t, svuint8_t, z24,
+ svluti2_lane_zt_s16 (0, z23, 1),
+ svluti2_lane_zt_s16 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svint16x2_t, svuint8_t, z1,
+ svluti2_lane_zt_s16_x2 (0, z0, 0),
+ svluti2_lane_zt_s16_x2 (0, z0, 0))
+
+/*
+** luti2_z18_z5_7:
+** luti2 {z18\.h - z19\.h}, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_7, svint16x2_t, svuint8_t, z18,
+ svluti2_lane_zt_s16_x2 (0, z5, 7),
+ svluti2_lane_zt_s16_x2 (0, z5, 7))
+
+/*
+** luti2_z24_z7_6:
+** luti2 {z24\.h - z25\.h}, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_6, svint16x2_t, svuint8_t, z24,
+ svluti2_lane_zt_s16_x2 (0, z7, 6),
+ svluti2_lane_zt_s16_x2 (0, z7, 6))
+
+/*
+** luti2_z28_z16_3:
+** luti2 {z28\.h - z29\.h}, zt0, z16\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_3, svint16x2_t, svuint8_t, z28,
+ svluti2_lane_zt_s16_x2 (0, z16, 3),
+ svluti2_lane_zt_s16_x2 (0, z16, 3))
+
+/*
+** luti2_z24_z23_1:
+** luti2 {z24\.h - z25\.h}, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svint16x2_t, svuint8_t, z24,
+ svluti2_lane_zt_s16_x2 (0, z23, 1),
+ svluti2_lane_zt_s16_x2 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svint16x4_t, svuint8_t, z1,
+ svluti2_lane_zt_s16_x4 (0, z0, 0),
+ svluti2_lane_zt_s16_x4 (0, z0, 0))
+
+/*
+** luti2_z18_z5_3:
+** luti2 {[^\n]+}, zt0, z5\[3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_3, svint16x4_t, svuint8_t, z18,
+ svluti2_lane_zt_s16_x4 (0, z5, 3),
+ svluti2_lane_zt_s16_x4 (0, z5, 3))
+
+/*
+** luti2_z24_z7_2:
+** luti2 {z24\.h - z27\.h}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_2, svint16x4_t, svuint8_t, z24,
+ svluti2_lane_zt_s16_x4 (0, z7, 2),
+ svluti2_lane_zt_s16_x4 (0, z7, 2))
+
+/*
+** luti2_z28_z16_1:
+** luti2 {z28\.h - z31\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_1, svint16x4_t, svuint8_t, z28,
+ svluti2_lane_zt_s16_x4 (0, z16, 1),
+ svluti2_lane_zt_s16_x4 (0, z16, 1))
+
+/*
+** luti2_z24_z23_0:
+** luti2 {z24\.h - z27\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_0, svint16x4_t, svuint8_t, z24,
+ svluti2_lane_zt_s16_x4 (0, z23, 0),
+ svluti2_lane_zt_s16_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 z1\.s, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svint32_t, svuint8_t, z1,
+ svluti2_lane_zt_s32 (0, z0, 0),
+ svluti2_lane_zt_s32 (0, z0, 0))
+
+/*
+** luti2_z18_z5_15:
+** luti2 z18\.s, zt0, z5\[15\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_15, svint32_t, svuint8_t, z18,
+ svluti2_lane_zt_s32 (0, z5, 15),
+ svluti2_lane_zt_s32 (0, z5, 15))
+
+/*
+** luti2_z24_z7_13:
+** luti2 z24\.s, zt0, z7\[13\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_13, svint32_t, svuint8_t, z24,
+ svluti2_lane_zt_s32 (0, z7, 13),
+ svluti2_lane_zt_s32 (0, z7, 13))
+
+/*
+** luti2_z28_z16_11:
+** luti2 z28\.s, zt0, z16\[11\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_11, svint32_t, svuint8_t, z28,
+ svluti2_lane_zt_s32 (0, z16, 11),
+ svluti2_lane_zt_s32 (0, z16, 11))
+
+/*
+** luti2_z24_z23_1:
+** luti2 z24\.s, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svint32_t, svuint8_t, z24,
+ svluti2_lane_zt_s32 (0, z23, 1),
+ svluti2_lane_zt_s32 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svint32x2_t, svuint8_t, z1,
+ svluti2_lane_zt_s32_x2 (0, z0, 0),
+ svluti2_lane_zt_s32_x2 (0, z0, 0))
+
+/*
+** luti2_z18_z5_7:
+** luti2 {z18\.s - z19\.s}, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_7, svint32x2_t, svuint8_t, z18,
+ svluti2_lane_zt_s32_x2 (0, z5, 7),
+ svluti2_lane_zt_s32_x2 (0, z5, 7))
+
+/*
+** luti2_z24_z7_6:
+** luti2 {z24\.s - z25\.s}, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_6, svint32x2_t, svuint8_t, z24,
+ svluti2_lane_zt_s32_x2 (0, z7, 6),
+ svluti2_lane_zt_s32_x2 (0, z7, 6))
+
+/*
+** luti2_z28_z16_3:
+** luti2 {z28\.s - z29\.s}, zt0, z16\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_3, svint32x2_t, svuint8_t, z28,
+ svluti2_lane_zt_s32_x2 (0, z16, 3),
+ svluti2_lane_zt_s32_x2 (0, z16, 3))
+
+/*
+** luti2_z24_z23_1:
+** luti2 {z24\.s - z25\.s}, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svint32x2_t, svuint8_t, z24,
+ svluti2_lane_zt_s32_x2 (0, z23, 1),
+ svluti2_lane_zt_s32_x2 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svint32x4_t, svuint8_t, z1,
+ svluti2_lane_zt_s32_x4 (0, z0, 0),
+ svluti2_lane_zt_s32_x4 (0, z0, 0))
+
+/*
+** luti2_z18_z5_3:
+** luti2 {[^\n]+}, zt0, z5\[3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_3, svint32x4_t, svuint8_t, z18,
+ svluti2_lane_zt_s32_x4 (0, z5, 3),
+ svluti2_lane_zt_s32_x4 (0, z5, 3))
+
+/*
+** luti2_z24_z7_2:
+** luti2 {z24\.s - z27\.s}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_2, svint32x4_t, svuint8_t, z24,
+ svluti2_lane_zt_s32_x4 (0, z7, 2),
+ svluti2_lane_zt_s32_x4 (0, z7, 2))
+
+/*
+** luti2_z28_z16_1:
+** luti2 {z28\.s - z31\.s}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_1, svint32x4_t, svuint8_t, z28,
+ svluti2_lane_zt_s32_x4 (0, z16, 1),
+ svluti2_lane_zt_s32_x4 (0, z16, 1))
+
+/*
+** luti2_z24_z23_0:
+** luti2 {z24\.s - z27\.s}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_0, svint32x4_t, svuint8_t, z24,
+ svluti2_lane_zt_s32_x4 (0, z23, 0),
+ svluti2_lane_zt_s32_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 z1\.b, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svint8_t, svuint8_t, z1,
+ svluti2_lane_zt_s8 (0, z0, 0),
+ svluti2_lane_zt_s8 (0, z0, 0))
+
+/*
+** luti2_z18_z5_15:
+** luti2 z18\.b, zt0, z5\[15\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_15, svint8_t, svuint8_t, z18,
+ svluti2_lane_zt_s8 (0, z5, 15),
+ svluti2_lane_zt_s8 (0, z5, 15))
+
+/*
+** luti2_z24_z7_13:
+** luti2 z24\.b, zt0, z7\[13\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_13, svint8_t, svuint8_t, z24,
+ svluti2_lane_zt_s8 (0, z7, 13),
+ svluti2_lane_zt_s8 (0, z7, 13))
+
+/*
+** luti2_z28_z16_11:
+** luti2 z28\.b, zt0, z16\[11\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_11, svint8_t, svuint8_t, z28,
+ svluti2_lane_zt_s8 (0, z16, 11),
+ svluti2_lane_zt_s8 (0, z16, 11))
+
+/*
+** luti2_z24_z23_1:
+** luti2 z24\.b, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svint8_t, svuint8_t, z24,
+ svluti2_lane_zt_s8 (0, z23, 1),
+ svluti2_lane_zt_s8 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svint8x2_t, svuint8_t, z1,
+ svluti2_lane_zt_s8_x2 (0, z0, 0),
+ svluti2_lane_zt_s8_x2 (0, z0, 0))
+
+/*
+** luti2_z18_z5_7:
+** luti2 {z18\.b - z19\.b}, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_7, svint8x2_t, svuint8_t, z18,
+ svluti2_lane_zt_s8_x2 (0, z5, 7),
+ svluti2_lane_zt_s8_x2 (0, z5, 7))
+
+/*
+** luti2_z24_z7_6:
+** luti2 {z24\.b - z25\.b}, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_6, svint8x2_t, svuint8_t, z24,
+ svluti2_lane_zt_s8_x2 (0, z7, 6),
+ svluti2_lane_zt_s8_x2 (0, z7, 6))
+
+/*
+** luti2_z28_z16_3:
+** luti2 {z28\.b - z29\.b}, zt0, z16\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_3, svint8x2_t, svuint8_t, z28,
+ svluti2_lane_zt_s8_x2 (0, z16, 3),
+ svluti2_lane_zt_s8_x2 (0, z16, 3))
+
+/*
+** luti2_z24_z23_1:
+** luti2 {z24\.b - z25\.b}, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svint8x2_t, svuint8_t, z24,
+ svluti2_lane_zt_s8_x2 (0, z23, 1),
+ svluti2_lane_zt_s8_x2 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svint8x4_t, svuint8_t, z1,
+ svluti2_lane_zt_s8_x4 (0, z0, 0),
+ svluti2_lane_zt_s8_x4 (0, z0, 0))
+
+/*
+** luti2_z18_z5_3:
+** luti2 {[^\n]+}, zt0, z5\[3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_3, svint8x4_t, svuint8_t, z18,
+ svluti2_lane_zt_s8_x4 (0, z5, 3),
+ svluti2_lane_zt_s8_x4 (0, z5, 3))
+
+/*
+** luti2_z24_z7_2:
+** luti2 {z24\.b - z27\.b}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_2, svint8x4_t, svuint8_t, z24,
+ svluti2_lane_zt_s8_x4 (0, z7, 2),
+ svluti2_lane_zt_s8_x4 (0, z7, 2))
+
+/*
+** luti2_z28_z16_1:
+** luti2 {z28\.b - z31\.b}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_1, svint8x4_t, svuint8_t, z28,
+ svluti2_lane_zt_s8_x4 (0, z16, 1),
+ svluti2_lane_zt_s8_x4 (0, z16, 1))
+
+/*
+** luti2_z24_z23_0:
+** luti2 {z24\.b - z27\.b}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_0, svint8x4_t, svuint8_t, z24,
+ svluti2_lane_zt_s8_x4 (0, z23, 0),
+ svluti2_lane_zt_s8_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 z1\.h, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svuint16_t, svuint8_t, z1,
+ svluti2_lane_zt_u16 (0, z0, 0),
+ svluti2_lane_zt_u16 (0, z0, 0))
+
+/*
+** luti2_z18_z5_15:
+** luti2 z18\.h, zt0, z5\[15\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_15, svuint16_t, svuint8_t, z18,
+ svluti2_lane_zt_u16 (0, z5, 15),
+ svluti2_lane_zt_u16 (0, z5, 15))
+
+/*
+** luti2_z24_z7_13:
+** luti2 z24\.h, zt0, z7\[13\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_13, svuint16_t, svuint8_t, z24,
+ svluti2_lane_zt_u16 (0, z7, 13),
+ svluti2_lane_zt_u16 (0, z7, 13))
+
+/*
+** luti2_z28_z16_11:
+** luti2 z28\.h, zt0, z16\[11\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_11, svuint16_t, svuint8_t, z28,
+ svluti2_lane_zt_u16 (0, z16, 11),
+ svluti2_lane_zt_u16 (0, z16, 11))
+
+/*
+** luti2_z24_z23_1:
+** luti2 z24\.h, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svuint16_t, svuint8_t, z24,
+ svluti2_lane_zt_u16 (0, z23, 1),
+ svluti2_lane_zt_u16 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svuint16x2_t, svuint8_t, z1,
+ svluti2_lane_zt_u16_x2 (0, z0, 0),
+ svluti2_lane_zt_u16_x2 (0, z0, 0))
+
+/*
+** luti2_z18_z5_7:
+** luti2 {z18\.h - z19\.h}, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_7, svuint16x2_t, svuint8_t, z18,
+ svluti2_lane_zt_u16_x2 (0, z5, 7),
+ svluti2_lane_zt_u16_x2 (0, z5, 7))
+
+/*
+** luti2_z24_z7_6:
+** luti2 {z24\.h - z25\.h}, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_6, svuint16x2_t, svuint8_t, z24,
+ svluti2_lane_zt_u16_x2 (0, z7, 6),
+ svluti2_lane_zt_u16_x2 (0, z7, 6))
+
+/*
+** luti2_z28_z16_3:
+** luti2 {z28\.h - z29\.h}, zt0, z16\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_3, svuint16x2_t, svuint8_t, z28,
+ svluti2_lane_zt_u16_x2 (0, z16, 3),
+ svluti2_lane_zt_u16_x2 (0, z16, 3))
+
+/*
+** luti2_z24_z23_1:
+** luti2 {z24\.h - z25\.h}, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svuint16x2_t, svuint8_t, z24,
+ svluti2_lane_zt_u16_x2 (0, z23, 1),
+ svluti2_lane_zt_u16_x2 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svuint16x4_t, svuint8_t, z1,
+ svluti2_lane_zt_u16_x4 (0, z0, 0),
+ svluti2_lane_zt_u16_x4 (0, z0, 0))
+
+/*
+** luti2_z18_z5_3:
+** luti2 {[^\n]+}, zt0, z5\[3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_3, svuint16x4_t, svuint8_t, z18,
+ svluti2_lane_zt_u16_x4 (0, z5, 3),
+ svluti2_lane_zt_u16_x4 (0, z5, 3))
+
+/*
+** luti2_z24_z7_2:
+** luti2 {z24\.h - z27\.h}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_2, svuint16x4_t, svuint8_t, z24,
+ svluti2_lane_zt_u16_x4 (0, z7, 2),
+ svluti2_lane_zt_u16_x4 (0, z7, 2))
+
+/*
+** luti2_z28_z16_1:
+** luti2 {z28\.h - z31\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_1, svuint16x4_t, svuint8_t, z28,
+ svluti2_lane_zt_u16_x4 (0, z16, 1),
+ svluti2_lane_zt_u16_x4 (0, z16, 1))
+
+/*
+** luti2_z24_z23_0:
+** luti2 {z24\.h - z27\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_0, svuint16x4_t, svuint8_t, z24,
+ svluti2_lane_zt_u16_x4 (0, z23, 0),
+ svluti2_lane_zt_u16_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 z1\.s, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svuint32_t, svuint8_t, z1,
+ svluti2_lane_zt_u32 (0, z0, 0),
+ svluti2_lane_zt_u32 (0, z0, 0))
+
+/*
+** luti2_z18_z5_15:
+** luti2 z18\.s, zt0, z5\[15\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_15, svuint32_t, svuint8_t, z18,
+ svluti2_lane_zt_u32 (0, z5, 15),
+ svluti2_lane_zt_u32 (0, z5, 15))
+
+/*
+** luti2_z24_z7_13:
+** luti2 z24\.s, zt0, z7\[13\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_13, svuint32_t, svuint8_t, z24,
+ svluti2_lane_zt_u32 (0, z7, 13),
+ svluti2_lane_zt_u32 (0, z7, 13))
+
+/*
+** luti2_z28_z16_11:
+** luti2 z28\.s, zt0, z16\[11\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_11, svuint32_t, svuint8_t, z28,
+ svluti2_lane_zt_u32 (0, z16, 11),
+ svluti2_lane_zt_u32 (0, z16, 11))
+
+/*
+** luti2_z24_z23_1:
+** luti2 z24\.s, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svuint32_t, svuint8_t, z24,
+ svluti2_lane_zt_u32 (0, z23, 1),
+ svluti2_lane_zt_u32 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svuint32x2_t, svuint8_t, z1,
+ svluti2_lane_zt_u32_x2 (0, z0, 0),
+ svluti2_lane_zt_u32_x2 (0, z0, 0))
+
+/*
+** luti2_z18_z5_7:
+** luti2 {z18\.s - z19\.s}, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_7, svuint32x2_t, svuint8_t, z18,
+ svluti2_lane_zt_u32_x2 (0, z5, 7),
+ svluti2_lane_zt_u32_x2 (0, z5, 7))
+
+/*
+** luti2_z24_z7_6:
+** luti2 {z24\.s - z25\.s}, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_6, svuint32x2_t, svuint8_t, z24,
+ svluti2_lane_zt_u32_x2 (0, z7, 6),
+ svluti2_lane_zt_u32_x2 (0, z7, 6))
+
+/*
+** luti2_z28_z16_3:
+** luti2 {z28\.s - z29\.s}, zt0, z16\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_3, svuint32x2_t, svuint8_t, z28,
+ svluti2_lane_zt_u32_x2 (0, z16, 3),
+ svluti2_lane_zt_u32_x2 (0, z16, 3))
+
+/*
+** luti2_z24_z23_1:
+** luti2 {z24\.s - z25\.s}, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svuint32x2_t, svuint8_t, z24,
+ svluti2_lane_zt_u32_x2 (0, z23, 1),
+ svluti2_lane_zt_u32_x2 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svuint32x4_t, svuint8_t, z1,
+ svluti2_lane_zt_u32_x4 (0, z0, 0),
+ svluti2_lane_zt_u32_x4 (0, z0, 0))
+
+/*
+** luti2_z18_z5_3:
+** luti2 {[^\n]+}, zt0, z5\[3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_3, svuint32x4_t, svuint8_t, z18,
+ svluti2_lane_zt_u32_x4 (0, z5, 3),
+ svluti2_lane_zt_u32_x4 (0, z5, 3))
+
+/*
+** luti2_z24_z7_2:
+** luti2 {z24\.s - z27\.s}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_2, svuint32x4_t, svuint8_t, z24,
+ svluti2_lane_zt_u32_x4 (0, z7, 2),
+ svluti2_lane_zt_u32_x4 (0, z7, 2))
+
+/*
+** luti2_z28_z16_1:
+** luti2 {z28\.s - z31\.s}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_1, svuint32x4_t, svuint8_t, z28,
+ svluti2_lane_zt_u32_x4 (0, z16, 1),
+ svluti2_lane_zt_u32_x4 (0, z16, 1))
+
+/*
+** luti2_z24_z23_0:
+** luti2 {z24\.s - z27\.s}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_0, svuint32x4_t, svuint8_t, z24,
+ svluti2_lane_zt_u32_x4 (0, z23, 0),
+ svluti2_lane_zt_u32_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 z1\.b, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svuint8_t, svuint8_t, z1,
+ svluti2_lane_zt_u8 (0, z0, 0),
+ svluti2_lane_zt_u8 (0, z0, 0))
+
+/*
+** luti2_z18_z5_15:
+** luti2 z18\.b, zt0, z5\[15\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_15, svuint8_t, svuint8_t, z18,
+ svluti2_lane_zt_u8 (0, z5, 15),
+ svluti2_lane_zt_u8 (0, z5, 15))
+
+/*
+** luti2_z24_z7_13:
+** luti2 z24\.b, zt0, z7\[13\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_13, svuint8_t, svuint8_t, z24,
+ svluti2_lane_zt_u8 (0, z7, 13),
+ svluti2_lane_zt_u8 (0, z7, 13))
+
+/*
+** luti2_z28_z16_11:
+** luti2 z28\.b, zt0, z16\[11\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_11, svuint8_t, svuint8_t, z28,
+ svluti2_lane_zt_u8 (0, z16, 11),
+ svluti2_lane_zt_u8 (0, z16, 11))
+
+/*
+** luti2_z24_z23_1:
+** luti2 z24\.b, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svuint8_t, svuint8_t, z24,
+ svluti2_lane_zt_u8 (0, z23, 1),
+ svluti2_lane_zt_u8 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svuint8x2_t, svuint8_t, z1,
+ svluti2_lane_zt_u8_x2 (0, z0, 0),
+ svluti2_lane_zt_u8_x2 (0, z0, 0))
+
+/*
+** luti2_z18_z5_7:
+** luti2 {z18\.b - z19\.b}, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_7, svuint8x2_t, svuint8_t, z18,
+ svluti2_lane_zt_u8_x2 (0, z5, 7),
+ svluti2_lane_zt_u8_x2 (0, z5, 7))
+
+/*
+** luti2_z24_z7_6:
+** luti2 {z24\.b - z25\.b}, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_6, svuint8x2_t, svuint8_t, z24,
+ svluti2_lane_zt_u8_x2 (0, z7, 6),
+ svluti2_lane_zt_u8_x2 (0, z7, 6))
+
+/*
+** luti2_z28_z16_3:
+** luti2 {z28\.b - z29\.b}, zt0, z16\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_3, svuint8x2_t, svuint8_t, z28,
+ svluti2_lane_zt_u8_x2 (0, z16, 3),
+ svluti2_lane_zt_u8_x2 (0, z16, 3))
+
+/*
+** luti2_z24_z23_1:
+** luti2 {z24\.b - z25\.b}, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_1, svuint8x2_t, svuint8_t, z24,
+ svluti2_lane_zt_u8_x2 (0, z23, 1),
+ svluti2_lane_zt_u8_x2 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti2_z1_z0_0:
+** luti2 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z1_z0_0, svuint8x4_t, svuint8_t, z1,
+ svluti2_lane_zt_u8_x4 (0, z0, 0),
+ svluti2_lane_zt_u8_x4 (0, z0, 0))
+
+/*
+** luti2_z18_z5_3:
+** luti2 {[^\n]+}, zt0, z5\[3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti2_z18_z5_3, svuint8x4_t, svuint8_t, z18,
+ svluti2_lane_zt_u8_x4 (0, z5, 3),
+ svluti2_lane_zt_u8_x4 (0, z5, 3))
+
+/*
+** luti2_z24_z7_2:
+** luti2 {z24\.b - z27\.b}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z7_2, svuint8x4_t, svuint8_t, z24,
+ svluti2_lane_zt_u8_x4 (0, z7, 2),
+ svluti2_lane_zt_u8_x4 (0, z7, 2))
+
+/*
+** luti2_z28_z16_1:
+** luti2 {z28\.b - z31\.b}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z28_z16_1, svuint8x4_t, svuint8_t, z28,
+ svluti2_lane_zt_u8_x4 (0, z16, 1),
+ svluti2_lane_zt_u8_x4 (0, z16, 1))
+
+/*
+** luti2_z24_z23_0:
+** luti2 {z24\.b - z27\.b}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti2_z24_z23_0, svuint8x4_t, svuint8_t, z24,
+ svluti2_lane_zt_u8_x4 (0, z23, 0),
+ svluti2_lane_zt_u8_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 z1\.h, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svbfloat16_t, svuint8_t, z1,
+ svluti4_lane_zt_bf16 (0, z0, 0),
+ svluti4_lane_zt_bf16 (0, z0, 0))
+
+/*
+** luti4_z18_z5_7:
+** luti4 z18\.h, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_7, svbfloat16_t, svuint8_t, z18,
+ svluti4_lane_zt_bf16 (0, z5, 7),
+ svluti4_lane_zt_bf16 (0, z5, 7))
+
+/*
+** luti4_z24_z7_6:
+** luti4 z24\.h, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_6, svbfloat16_t, svuint8_t, z24,
+ svluti4_lane_zt_bf16 (0, z7, 6),
+ svluti4_lane_zt_bf16 (0, z7, 6))
+
+/*
+** luti4_z28_z16_4:
+** luti4 z28\.h, zt0, z16\[4\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_4, svbfloat16_t, svuint8_t, z28,
+ svluti4_lane_zt_bf16 (0, z16, 4),
+ svluti4_lane_zt_bf16 (0, z16, 4))
+
+/*
+** luti4_z24_z23_1:
+** luti4 z24\.h, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_1, svbfloat16_t, svuint8_t, z24,
+ svluti4_lane_zt_bf16 (0, z23, 1),
+ svluti4_lane_zt_bf16 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svbfloat16x2_t, svuint8_t, z1,
+ svluti4_lane_zt_bf16_x2 (0, z0, 0),
+ svluti4_lane_zt_bf16_x2 (0, z0, 0))
+
+/*
+** luti4_z18_z5_3:
+** luti4 {z18\.h - z19\.h}, zt0, z5\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_3, svbfloat16x2_t, svuint8_t, z18,
+ svluti4_lane_zt_bf16_x2 (0, z5, 3),
+ svluti4_lane_zt_bf16_x2 (0, z5, 3))
+
+/*
+** luti4_z24_z7_2:
+** luti4 {z24\.h - z25\.h}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_2, svbfloat16x2_t, svuint8_t, z24,
+ svluti4_lane_zt_bf16_x2 (0, z7, 2),
+ svluti4_lane_zt_bf16_x2 (0, z7, 2))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.h - z29\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svbfloat16x2_t, svuint8_t, z28,
+ svluti4_lane_zt_bf16_x2 (0, z16, 1),
+ svluti4_lane_zt_bf16_x2 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.h - z25\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svbfloat16x2_t, svuint8_t, z24,
+ svluti4_lane_zt_bf16_x2 (0, z23, 0),
+ svluti4_lane_zt_bf16_x2 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svbfloat16x4_t, svuint8_t, z1,
+ svluti4_lane_zt_bf16_x4 (0, z0, 0),
+ svluti4_lane_zt_bf16_x4 (0, z0, 0))
+
+/*
+** luti4_z18_z5_1:
+** luti4 {[^\n]+}, zt0, z5\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_1, svbfloat16x4_t, svuint8_t, z18,
+ svluti4_lane_zt_bf16_x4 (0, z5, 1),
+ svluti4_lane_zt_bf16_x4 (0, z5, 1))
+
+/*
+** luti4_z24_z7_0:
+** luti4 {z24\.h - z27\.h}, zt0, z7\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_0, svbfloat16x4_t, svuint8_t, z24,
+ svluti4_lane_zt_bf16_x4 (0, z7, 0),
+ svluti4_lane_zt_bf16_x4 (0, z7, 0))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.h - z31\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svbfloat16x4_t, svuint8_t, z28,
+ svluti4_lane_zt_bf16_x4 (0, z16, 1),
+ svluti4_lane_zt_bf16_x4 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.h - z27\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svbfloat16x4_t, svuint8_t, z24,
+ svluti4_lane_zt_bf16_x4 (0, z23, 0),
+ svluti4_lane_zt_bf16_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 z1\.h, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svfloat16_t, svuint8_t, z1,
+ svluti4_lane_zt_f16 (0, z0, 0),
+ svluti4_lane_zt_f16 (0, z0, 0))
+
+/*
+** luti4_z18_z5_7:
+** luti4 z18\.h, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_7, svfloat16_t, svuint8_t, z18,
+ svluti4_lane_zt_f16 (0, z5, 7),
+ svluti4_lane_zt_f16 (0, z5, 7))
+
+/*
+** luti4_z24_z7_6:
+** luti4 z24\.h, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_6, svfloat16_t, svuint8_t, z24,
+ svluti4_lane_zt_f16 (0, z7, 6),
+ svluti4_lane_zt_f16 (0, z7, 6))
+
+/*
+** luti4_z28_z16_4:
+** luti4 z28\.h, zt0, z16\[4\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_4, svfloat16_t, svuint8_t, z28,
+ svluti4_lane_zt_f16 (0, z16, 4),
+ svluti4_lane_zt_f16 (0, z16, 4))
+
+/*
+** luti4_z24_z23_1:
+** luti4 z24\.h, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_1, svfloat16_t, svuint8_t, z24,
+ svluti4_lane_zt_f16 (0, z23, 1),
+ svluti4_lane_zt_f16 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svfloat16x2_t, svuint8_t, z1,
+ svluti4_lane_zt_f16_x2 (0, z0, 0),
+ svluti4_lane_zt_f16_x2 (0, z0, 0))
+
+/*
+** luti4_z18_z5_3:
+** luti4 {z18\.h - z19\.h}, zt0, z5\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_3, svfloat16x2_t, svuint8_t, z18,
+ svluti4_lane_zt_f16_x2 (0, z5, 3),
+ svluti4_lane_zt_f16_x2 (0, z5, 3))
+
+/*
+** luti4_z24_z7_2:
+** luti4 {z24\.h - z25\.h}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_2, svfloat16x2_t, svuint8_t, z24,
+ svluti4_lane_zt_f16_x2 (0, z7, 2),
+ svluti4_lane_zt_f16_x2 (0, z7, 2))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.h - z29\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svfloat16x2_t, svuint8_t, z28,
+ svluti4_lane_zt_f16_x2 (0, z16, 1),
+ svluti4_lane_zt_f16_x2 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.h - z25\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svfloat16x2_t, svuint8_t, z24,
+ svluti4_lane_zt_f16_x2 (0, z23, 0),
+ svluti4_lane_zt_f16_x2 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svfloat16x4_t, svuint8_t, z1,
+ svluti4_lane_zt_f16_x4 (0, z0, 0),
+ svluti4_lane_zt_f16_x4 (0, z0, 0))
+
+/*
+** luti4_z18_z5_1:
+** luti4 {[^\n]+}, zt0, z5\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_1, svfloat16x4_t, svuint8_t, z18,
+ svluti4_lane_zt_f16_x4 (0, z5, 1),
+ svluti4_lane_zt_f16_x4 (0, z5, 1))
+
+/*
+** luti4_z24_z7_0:
+** luti4 {z24\.h - z27\.h}, zt0, z7\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_0, svfloat16x4_t, svuint8_t, z24,
+ svluti4_lane_zt_f16_x4 (0, z7, 0),
+ svluti4_lane_zt_f16_x4 (0, z7, 0))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.h - z31\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svfloat16x4_t, svuint8_t, z28,
+ svluti4_lane_zt_f16_x4 (0, z16, 1),
+ svluti4_lane_zt_f16_x4 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.h - z27\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svfloat16x4_t, svuint8_t, z24,
+ svluti4_lane_zt_f16_x4 (0, z23, 0),
+ svluti4_lane_zt_f16_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 z1\.s, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svfloat32_t, svuint8_t, z1,
+ svluti4_lane_zt_f32 (0, z0, 0),
+ svluti4_lane_zt_f32 (0, z0, 0))
+
+/*
+** luti4_z18_z5_7:
+** luti4 z18\.s, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_7, svfloat32_t, svuint8_t, z18,
+ svluti4_lane_zt_f32 (0, z5, 7),
+ svluti4_lane_zt_f32 (0, z5, 7))
+
+/*
+** luti4_z24_z7_6:
+** luti4 z24\.s, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_6, svfloat32_t, svuint8_t, z24,
+ svluti4_lane_zt_f32 (0, z7, 6),
+ svluti4_lane_zt_f32 (0, z7, 6))
+
+/*
+** luti4_z28_z16_4:
+** luti4 z28\.s, zt0, z16\[4\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_4, svfloat32_t, svuint8_t, z28,
+ svluti4_lane_zt_f32 (0, z16, 4),
+ svluti4_lane_zt_f32 (0, z16, 4))
+
+/*
+** luti4_z24_z23_1:
+** luti4 z24\.s, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_1, svfloat32_t, svuint8_t, z24,
+ svluti4_lane_zt_f32 (0, z23, 1),
+ svluti4_lane_zt_f32 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svfloat32x2_t, svuint8_t, z1,
+ svluti4_lane_zt_f32_x2 (0, z0, 0),
+ svluti4_lane_zt_f32_x2 (0, z0, 0))
+
+/*
+** luti4_z18_z5_3:
+** luti4 {z18\.s - z19\.s}, zt0, z5\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_3, svfloat32x2_t, svuint8_t, z18,
+ svluti4_lane_zt_f32_x2 (0, z5, 3),
+ svluti4_lane_zt_f32_x2 (0, z5, 3))
+
+/*
+** luti4_z24_z7_2:
+** luti4 {z24\.s - z25\.s}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_2, svfloat32x2_t, svuint8_t, z24,
+ svluti4_lane_zt_f32_x2 (0, z7, 2),
+ svluti4_lane_zt_f32_x2 (0, z7, 2))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.s - z29\.s}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svfloat32x2_t, svuint8_t, z28,
+ svluti4_lane_zt_f32_x2 (0, z16, 1),
+ svluti4_lane_zt_f32_x2 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.s - z25\.s}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svfloat32x2_t, svuint8_t, z24,
+ svluti4_lane_zt_f32_x2 (0, z23, 0),
+ svluti4_lane_zt_f32_x2 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svfloat32x4_t, svuint8_t, z1,
+ svluti4_lane_zt_f32_x4 (0, z0, 0),
+ svluti4_lane_zt_f32_x4 (0, z0, 0))
+
+/*
+** luti4_z18_z5_1:
+** luti4 {[^\n]+}, zt0, z5\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_1, svfloat32x4_t, svuint8_t, z18,
+ svluti4_lane_zt_f32_x4 (0, z5, 1),
+ svluti4_lane_zt_f32_x4 (0, z5, 1))
+
+/*
+** luti4_z24_z7_0:
+** luti4 {z24\.s - z27\.s}, zt0, z7\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_0, svfloat32x4_t, svuint8_t, z24,
+ svluti4_lane_zt_f32_x4 (0, z7, 0),
+ svluti4_lane_zt_f32_x4 (0, z7, 0))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.s - z31\.s}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svfloat32x4_t, svuint8_t, z28,
+ svluti4_lane_zt_f32_x4 (0, z16, 1),
+ svluti4_lane_zt_f32_x4 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.s - z27\.s}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svfloat32x4_t, svuint8_t, z24,
+ svluti4_lane_zt_f32_x4 (0, z23, 0),
+ svluti4_lane_zt_f32_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 z1\.h, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svint16_t, svuint8_t, z1,
+ svluti4_lane_zt_s16 (0, z0, 0),
+ svluti4_lane_zt_s16 (0, z0, 0))
+
+/*
+** luti4_z18_z5_7:
+** luti4 z18\.h, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_7, svint16_t, svuint8_t, z18,
+ svluti4_lane_zt_s16 (0, z5, 7),
+ svluti4_lane_zt_s16 (0, z5, 7))
+
+/*
+** luti4_z24_z7_6:
+** luti4 z24\.h, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_6, svint16_t, svuint8_t, z24,
+ svluti4_lane_zt_s16 (0, z7, 6),
+ svluti4_lane_zt_s16 (0, z7, 6))
+
+/*
+** luti4_z28_z16_4:
+** luti4 z28\.h, zt0, z16\[4\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_4, svint16_t, svuint8_t, z28,
+ svluti4_lane_zt_s16 (0, z16, 4),
+ svluti4_lane_zt_s16 (0, z16, 4))
+
+/*
+** luti4_z24_z23_1:
+** luti4 z24\.h, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_1, svint16_t, svuint8_t, z24,
+ svluti4_lane_zt_s16 (0, z23, 1),
+ svluti4_lane_zt_s16 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svint16x2_t, svuint8_t, z1,
+ svluti4_lane_zt_s16_x2 (0, z0, 0),
+ svluti4_lane_zt_s16_x2 (0, z0, 0))
+
+/*
+** luti4_z18_z5_3:
+** luti4 {z18\.h - z19\.h}, zt0, z5\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_3, svint16x2_t, svuint8_t, z18,
+ svluti4_lane_zt_s16_x2 (0, z5, 3),
+ svluti4_lane_zt_s16_x2 (0, z5, 3))
+
+/*
+** luti4_z24_z7_2:
+** luti4 {z24\.h - z25\.h}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_2, svint16x2_t, svuint8_t, z24,
+ svluti4_lane_zt_s16_x2 (0, z7, 2),
+ svluti4_lane_zt_s16_x2 (0, z7, 2))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.h - z29\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svint16x2_t, svuint8_t, z28,
+ svluti4_lane_zt_s16_x2 (0, z16, 1),
+ svluti4_lane_zt_s16_x2 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.h - z25\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svint16x2_t, svuint8_t, z24,
+ svluti4_lane_zt_s16_x2 (0, z23, 0),
+ svluti4_lane_zt_s16_x2 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svint16x4_t, svuint8_t, z1,
+ svluti4_lane_zt_s16_x4 (0, z0, 0),
+ svluti4_lane_zt_s16_x4 (0, z0, 0))
+
+/*
+** luti4_z18_z5_1:
+** luti4 {[^\n]+}, zt0, z5\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_1, svint16x4_t, svuint8_t, z18,
+ svluti4_lane_zt_s16_x4 (0, z5, 1),
+ svluti4_lane_zt_s16_x4 (0, z5, 1))
+
+/*
+** luti4_z24_z7_0:
+** luti4 {z24\.h - z27\.h}, zt0, z7\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_0, svint16x4_t, svuint8_t, z24,
+ svluti4_lane_zt_s16_x4 (0, z7, 0),
+ svluti4_lane_zt_s16_x4 (0, z7, 0))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.h - z31\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svint16x4_t, svuint8_t, z28,
+ svluti4_lane_zt_s16_x4 (0, z16, 1),
+ svluti4_lane_zt_s16_x4 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.h - z27\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svint16x4_t, svuint8_t, z24,
+ svluti4_lane_zt_s16_x4 (0, z23, 0),
+ svluti4_lane_zt_s16_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 z1\.s, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svint32_t, svuint8_t, z1,
+ svluti4_lane_zt_s32 (0, z0, 0),
+ svluti4_lane_zt_s32 (0, z0, 0))
+
+/*
+** luti4_z18_z5_7:
+** luti4 z18\.s, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_7, svint32_t, svuint8_t, z18,
+ svluti4_lane_zt_s32 (0, z5, 7),
+ svluti4_lane_zt_s32 (0, z5, 7))
+
+/*
+** luti4_z24_z7_6:
+** luti4 z24\.s, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_6, svint32_t, svuint8_t, z24,
+ svluti4_lane_zt_s32 (0, z7, 6),
+ svluti4_lane_zt_s32 (0, z7, 6))
+
+/*
+** luti4_z28_z16_4:
+** luti4 z28\.s, zt0, z16\[4\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_4, svint32_t, svuint8_t, z28,
+ svluti4_lane_zt_s32 (0, z16, 4),
+ svluti4_lane_zt_s32 (0, z16, 4))
+
+/*
+** luti4_z24_z23_1:
+** luti4 z24\.s, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_1, svint32_t, svuint8_t, z24,
+ svluti4_lane_zt_s32 (0, z23, 1),
+ svluti4_lane_zt_s32 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svint32x2_t, svuint8_t, z1,
+ svluti4_lane_zt_s32_x2 (0, z0, 0),
+ svluti4_lane_zt_s32_x2 (0, z0, 0))
+
+/*
+** luti4_z18_z5_3:
+** luti4 {z18\.s - z19\.s}, zt0, z5\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_3, svint32x2_t, svuint8_t, z18,
+ svluti4_lane_zt_s32_x2 (0, z5, 3),
+ svluti4_lane_zt_s32_x2 (0, z5, 3))
+
+/*
+** luti4_z24_z7_2:
+** luti4 {z24\.s - z25\.s}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_2, svint32x2_t, svuint8_t, z24,
+ svluti4_lane_zt_s32_x2 (0, z7, 2),
+ svluti4_lane_zt_s32_x2 (0, z7, 2))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.s - z29\.s}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svint32x2_t, svuint8_t, z28,
+ svluti4_lane_zt_s32_x2 (0, z16, 1),
+ svluti4_lane_zt_s32_x2 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.s - z25\.s}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svint32x2_t, svuint8_t, z24,
+ svluti4_lane_zt_s32_x2 (0, z23, 0),
+ svluti4_lane_zt_s32_x2 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svint32x4_t, svuint8_t, z1,
+ svluti4_lane_zt_s32_x4 (0, z0, 0),
+ svluti4_lane_zt_s32_x4 (0, z0, 0))
+
+/*
+** luti4_z18_z5_1:
+** luti4 {[^\n]+}, zt0, z5\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_1, svint32x4_t, svuint8_t, z18,
+ svluti4_lane_zt_s32_x4 (0, z5, 1),
+ svluti4_lane_zt_s32_x4 (0, z5, 1))
+
+/*
+** luti4_z24_z7_0:
+** luti4 {z24\.s - z27\.s}, zt0, z7\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_0, svint32x4_t, svuint8_t, z24,
+ svluti4_lane_zt_s32_x4 (0, z7, 0),
+ svluti4_lane_zt_s32_x4 (0, z7, 0))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.s - z31\.s}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svint32x4_t, svuint8_t, z28,
+ svluti4_lane_zt_s32_x4 (0, z16, 1),
+ svluti4_lane_zt_s32_x4 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.s - z27\.s}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svint32x4_t, svuint8_t, z24,
+ svluti4_lane_zt_s32_x4 (0, z23, 0),
+ svluti4_lane_zt_s32_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 z1\.b, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svint8_t, svuint8_t, z1,
+ svluti4_lane_zt_s8 (0, z0, 0),
+ svluti4_lane_zt_s8 (0, z0, 0))
+
+/*
+** luti4_z18_z5_7:
+** luti4 z18\.b, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_7, svint8_t, svuint8_t, z18,
+ svluti4_lane_zt_s8 (0, z5, 7),
+ svluti4_lane_zt_s8 (0, z5, 7))
+
+/*
+** luti4_z24_z7_6:
+** luti4 z24\.b, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_6, svint8_t, svuint8_t, z24,
+ svluti4_lane_zt_s8 (0, z7, 6),
+ svluti4_lane_zt_s8 (0, z7, 6))
+
+/*
+** luti4_z28_z16_4:
+** luti4 z28\.b, zt0, z16\[4\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_4, svint8_t, svuint8_t, z28,
+ svluti4_lane_zt_s8 (0, z16, 4),
+ svluti4_lane_zt_s8 (0, z16, 4))
+
+/*
+** luti4_z24_z23_1:
+** luti4 z24\.b, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_1, svint8_t, svuint8_t, z24,
+ svluti4_lane_zt_s8 (0, z23, 1),
+ svluti4_lane_zt_s8 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svint8x2_t, svuint8_t, z1,
+ svluti4_lane_zt_s8_x2 (0, z0, 0),
+ svluti4_lane_zt_s8_x2 (0, z0, 0))
+
+/*
+** luti4_z18_z5_3:
+** luti4 {z18\.b - z19\.b}, zt0, z5\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_3, svint8x2_t, svuint8_t, z18,
+ svluti4_lane_zt_s8_x2 (0, z5, 3),
+ svluti4_lane_zt_s8_x2 (0, z5, 3))
+
+/*
+** luti4_z24_z7_2:
+** luti4 {z24\.b - z25\.b}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_2, svint8x2_t, svuint8_t, z24,
+ svluti4_lane_zt_s8_x2 (0, z7, 2),
+ svluti4_lane_zt_s8_x2 (0, z7, 2))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.b - z29\.b}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svint8x2_t, svuint8_t, z28,
+ svluti4_lane_zt_s8_x2 (0, z16, 1),
+ svluti4_lane_zt_s8_x2 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.b - z25\.b}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svint8x2_t, svuint8_t, z24,
+ svluti4_lane_zt_s8_x2 (0, z23, 0),
+ svluti4_lane_zt_s8_x2 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 z1\.h, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svuint16_t, svuint8_t, z1,
+ svluti4_lane_zt_u16 (0, z0, 0),
+ svluti4_lane_zt_u16 (0, z0, 0))
+
+/*
+** luti4_z18_z5_7:
+** luti4 z18\.h, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_7, svuint16_t, svuint8_t, z18,
+ svluti4_lane_zt_u16 (0, z5, 7),
+ svluti4_lane_zt_u16 (0, z5, 7))
+
+/*
+** luti4_z24_z7_6:
+** luti4 z24\.h, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_6, svuint16_t, svuint8_t, z24,
+ svluti4_lane_zt_u16 (0, z7, 6),
+ svluti4_lane_zt_u16 (0, z7, 6))
+
+/*
+** luti4_z28_z16_4:
+** luti4 z28\.h, zt0, z16\[4\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_4, svuint16_t, svuint8_t, z28,
+ svluti4_lane_zt_u16 (0, z16, 4),
+ svluti4_lane_zt_u16 (0, z16, 4))
+
+/*
+** luti4_z24_z23_1:
+** luti4 z24\.h, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_1, svuint16_t, svuint8_t, z24,
+ svluti4_lane_zt_u16 (0, z23, 1),
+ svluti4_lane_zt_u16 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svuint16x2_t, svuint8_t, z1,
+ svluti4_lane_zt_u16_x2 (0, z0, 0),
+ svluti4_lane_zt_u16_x2 (0, z0, 0))
+
+/*
+** luti4_z18_z5_3:
+** luti4 {z18\.h - z19\.h}, zt0, z5\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_3, svuint16x2_t, svuint8_t, z18,
+ svluti4_lane_zt_u16_x2 (0, z5, 3),
+ svluti4_lane_zt_u16_x2 (0, z5, 3))
+
+/*
+** luti4_z24_z7_2:
+** luti4 {z24\.h - z25\.h}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_2, svuint16x2_t, svuint8_t, z24,
+ svluti4_lane_zt_u16_x2 (0, z7, 2),
+ svluti4_lane_zt_u16_x2 (0, z7, 2))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.h - z29\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svuint16x2_t, svuint8_t, z28,
+ svluti4_lane_zt_u16_x2 (0, z16, 1),
+ svluti4_lane_zt_u16_x2 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.h - z25\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svuint16x2_t, svuint8_t, z24,
+ svluti4_lane_zt_u16_x2 (0, z23, 0),
+ svluti4_lane_zt_u16_x2 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svuint16x4_t, svuint8_t, z1,
+ svluti4_lane_zt_u16_x4 (0, z0, 0),
+ svluti4_lane_zt_u16_x4 (0, z0, 0))
+
+/*
+** luti4_z18_z5_1:
+** luti4 {[^\n]+}, zt0, z5\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_1, svuint16x4_t, svuint8_t, z18,
+ svluti4_lane_zt_u16_x4 (0, z5, 1),
+ svluti4_lane_zt_u16_x4 (0, z5, 1))
+
+/*
+** luti4_z24_z7_0:
+** luti4 {z24\.h - z27\.h}, zt0, z7\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_0, svuint16x4_t, svuint8_t, z24,
+ svluti4_lane_zt_u16_x4 (0, z7, 0),
+ svluti4_lane_zt_u16_x4 (0, z7, 0))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.h - z31\.h}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svuint16x4_t, svuint8_t, z28,
+ svluti4_lane_zt_u16_x4 (0, z16, 1),
+ svluti4_lane_zt_u16_x4 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.h - z27\.h}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svuint16x4_t, svuint8_t, z24,
+ svluti4_lane_zt_u16_x4 (0, z23, 0),
+ svluti4_lane_zt_u16_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 z1\.s, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svuint32_t, svuint8_t, z1,
+ svluti4_lane_zt_u32 (0, z0, 0),
+ svluti4_lane_zt_u32 (0, z0, 0))
+
+/*
+** luti4_z18_z5_7:
+** luti4 z18\.s, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_7, svuint32_t, svuint8_t, z18,
+ svluti4_lane_zt_u32 (0, z5, 7),
+ svluti4_lane_zt_u32 (0, z5, 7))
+
+/*
+** luti4_z24_z7_6:
+** luti4 z24\.s, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_6, svuint32_t, svuint8_t, z24,
+ svluti4_lane_zt_u32 (0, z7, 6),
+ svluti4_lane_zt_u32 (0, z7, 6))
+
+/*
+** luti4_z28_z16_4:
+** luti4 z28\.s, zt0, z16\[4\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_4, svuint32_t, svuint8_t, z28,
+ svluti4_lane_zt_u32 (0, z16, 4),
+ svluti4_lane_zt_u32 (0, z16, 4))
+
+/*
+** luti4_z24_z23_1:
+** luti4 z24\.s, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_1, svuint32_t, svuint8_t, z24,
+ svluti4_lane_zt_u32 (0, z23, 1),
+ svluti4_lane_zt_u32 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svuint32x2_t, svuint8_t, z1,
+ svluti4_lane_zt_u32_x2 (0, z0, 0),
+ svluti4_lane_zt_u32_x2 (0, z0, 0))
+
+/*
+** luti4_z18_z5_3:
+** luti4 {z18\.s - z19\.s}, zt0, z5\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_3, svuint32x2_t, svuint8_t, z18,
+ svluti4_lane_zt_u32_x2 (0, z5, 3),
+ svluti4_lane_zt_u32_x2 (0, z5, 3))
+
+/*
+** luti4_z24_z7_2:
+** luti4 {z24\.s - z25\.s}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_2, svuint32x2_t, svuint8_t, z24,
+ svluti4_lane_zt_u32_x2 (0, z7, 2),
+ svluti4_lane_zt_u32_x2 (0, z7, 2))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.s - z29\.s}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svuint32x2_t, svuint8_t, z28,
+ svluti4_lane_zt_u32_x2 (0, z16, 1),
+ svluti4_lane_zt_u32_x2 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.s - z25\.s}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svuint32x2_t, svuint8_t, z24,
+ svluti4_lane_zt_u32_x2 (0, z23, 0),
+ svluti4_lane_zt_u32_x2 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svuint32x4_t, svuint8_t, z1,
+ svluti4_lane_zt_u32_x4 (0, z0, 0),
+ svluti4_lane_zt_u32_x4 (0, z0, 0))
+
+/*
+** luti4_z18_z5_1:
+** luti4 {[^\n]+}, zt0, z5\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_1, svuint32x4_t, svuint8_t, z18,
+ svluti4_lane_zt_u32_x4 (0, z5, 1),
+ svluti4_lane_zt_u32_x4 (0, z5, 1))
+
+/*
+** luti4_z24_z7_0:
+** luti4 {z24\.s - z27\.s}, zt0, z7\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_0, svuint32x4_t, svuint8_t, z24,
+ svluti4_lane_zt_u32_x4 (0, z7, 0),
+ svluti4_lane_zt_u32_x4 (0, z7, 0))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.s - z31\.s}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svuint32x4_t, svuint8_t, z28,
+ svluti4_lane_zt_u32_x4 (0, z16, 1),
+ svluti4_lane_zt_u32_x4 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.s - z27\.s}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svuint32x4_t, svuint8_t, z24,
+ svluti4_lane_zt_u32_x4 (0, z23, 0),
+ svluti4_lane_zt_u32_x4 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 z1\.b, zt0, z0\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svuint8_t, svuint8_t, z1,
+ svluti4_lane_zt_u8 (0, z0, 0),
+ svluti4_lane_zt_u8 (0, z0, 0))
+
+/*
+** luti4_z18_z5_7:
+** luti4 z18\.b, zt0, z5\[7\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_7, svuint8_t, svuint8_t, z18,
+ svluti4_lane_zt_u8 (0, z5, 7),
+ svluti4_lane_zt_u8 (0, z5, 7))
+
+/*
+** luti4_z24_z7_6:
+** luti4 z24\.b, zt0, z7\[6\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_6, svuint8_t, svuint8_t, z24,
+ svluti4_lane_zt_u8 (0, z7, 6),
+ svluti4_lane_zt_u8 (0, z7, 6))
+
+/*
+** luti4_z28_z16_4:
+** luti4 z28\.b, zt0, z16\[4\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_4, svuint8_t, svuint8_t, z28,
+ svluti4_lane_zt_u8 (0, z16, 4),
+ svluti4_lane_zt_u8 (0, z16, 4))
+
+/*
+** luti4_z24_z23_1:
+** luti4 z24\.b, zt0, z23\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_1, svuint8_t, svuint8_t, z24,
+ svluti4_lane_zt_u8 (0, z23, 1),
+ svluti4_lane_zt_u8 (0, z23, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** luti4_z1_z0_0:
+** luti4 {[^\n]+}, zt0, z0\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (luti4_z1_z0_0, svuint8x2_t, svuint8_t, z1,
+ svluti4_lane_zt_u8_x2 (0, z0, 0),
+ svluti4_lane_zt_u8_x2 (0, z0, 0))
+
+/*
+** luti4_z18_z5_3:
+** luti4 {z18\.b - z19\.b}, zt0, z5\[3\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z18_z5_3, svuint8x2_t, svuint8_t, z18,
+ svluti4_lane_zt_u8_x2 (0, z5, 3),
+ svluti4_lane_zt_u8_x2 (0, z5, 3))
+
+/*
+** luti4_z24_z7_2:
+** luti4 {z24\.b - z25\.b}, zt0, z7\[2\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z7_2, svuint8x2_t, svuint8_t, z24,
+ svluti4_lane_zt_u8_x2 (0, z7, 2),
+ svluti4_lane_zt_u8_x2 (0, z7, 2))
+
+/*
+** luti4_z28_z16_1:
+** luti4 {z28\.b - z29\.b}, zt0, z16\[1\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z28_z16_1, svuint8x2_t, svuint8_t, z28,
+ svluti4_lane_zt_u8_x2 (0, z16, 1),
+ svluti4_lane_zt_u8_x2 (0, z16, 1))
+
+/*
+** luti4_z24_z23_0:
+** luti4 {z24\.b - z25\.b}, zt0, z23\[0\]
+** ret
+*/
+TEST_XN_SINGLE (luti4_z24_z23_0, svuint8x2_t, svuint8_t, z24,
+ svluti4_lane_zt_u8_x2 (0, z23, 0),
+ svluti4_lane_zt_u8_x2 (0, z23, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** fmax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svfloat16x2_t, z0,
+ svmax_f16_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** fmax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svfloat16x2_t, z0,
+ svmax_f16_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z28\.h - z29\.h}
+** |
+** fmax [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svfloat16x2_t, z0,
+ svmax_f16_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** fmax {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svfloat16x2_t, z18,
+ svmax_f16_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svfloat16x2_t, z23,
+ svmax_f16_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** fmax {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svfloat16x2_t, z28,
+ svmax_f16_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** fmax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svfloat16x2_t, z0,
+ svmax_f16_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** fmax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svfloat16x2_t, z4,
+ svmax_f16_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmax_single_f16_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** fmax {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmax_single_f16_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmax_single_f16_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1,
+ svmax_single_f16_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1,
+ svmax_single_f16_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** fmax {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18,
+ svmax_single_f16_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** fmax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat16x2_t, svfloat16_t,
+ z0_res = svmax_single_f16_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** fmax {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat16x2_t, svfloat16_t,
+ z0 = svmax_single_f16_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24,
+ svmax_single_f16_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svfloat16x4_t, z0,
+ svmax_f16_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svfloat16x4_t, z0,
+ svmax_f16_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z28\.h - z31\.h}
+** |
+** fmax [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svfloat16x4_t, z0,
+ svmax_f16_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svfloat16x4_t, z18,
+ svmax_f16_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svfloat16x4_t, z23,
+ svmax_f16_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** fmax {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svfloat16x4_t, z28,
+ svmax_f16_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svfloat16x4_t, z0,
+ svmax_f16_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** fmax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svfloat16x4_t, z4,
+ svmax_f16_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmax_single_f16_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** fmax {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmax_single_f16_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmax_single_f16_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1,
+ svmax_single_f16_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1,
+ svmax_single_f16_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18,
+ svmax_single_f16_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** fmax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat16x4_t, svfloat16_t,
+ z0_res = svmax_single_f16_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat16x4_t, svfloat16_t,
+ z0 = svmax_single_f16_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24,
+ svmax_single_f16_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** fmax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svfloat32x2_t, z0,
+ svmax_f32_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** fmax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svfloat32x2_t, z0,
+ svmax_f32_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z28\.s - z29\.s}
+** |
+** fmax [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svfloat32x2_t, z0,
+ svmax_f32_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** fmax {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svfloat32x2_t, z18,
+ svmax_f32_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svfloat32x2_t, z23,
+ svmax_f32_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** fmax {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svfloat32x2_t, z28,
+ svmax_f32_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** fmax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svfloat32x2_t, z0,
+ svmax_f32_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** fmax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svfloat32x2_t, z4,
+ svmax_f32_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmax_single_f32_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** fmax {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmax_single_f32_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmax_single_f32_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1,
+ svmax_single_f32_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1,
+ svmax_single_f32_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** fmax {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18,
+ svmax_single_f32_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** fmax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat32x2_t, svfloat32_t,
+ z0_res = svmax_single_f32_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** fmax {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat32x2_t, svfloat32_t,
+ z0 = svmax_single_f32_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24,
+ svmax_single_f32_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svfloat32x4_t, z0,
+ svmax_f32_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svfloat32x4_t, z0,
+ svmax_f32_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z28\.s - z31\.s}
+** |
+** fmax [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svfloat32x4_t, z0,
+ svmax_f32_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svfloat32x4_t, z18,
+ svmax_f32_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svfloat32x4_t, z23,
+ svmax_f32_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** fmax {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svfloat32x4_t, z28,
+ svmax_f32_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svfloat32x4_t, z0,
+ svmax_f32_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** fmax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svfloat32x4_t, z4,
+ svmax_f32_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmax_single_f32_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** fmax {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmax_single_f32_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmax_single_f32_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1,
+ svmax_single_f32_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1,
+ svmax_single_f32_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18,
+ svmax_single_f32_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** fmax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat32x4_t, svfloat32_t,
+ z0_res = svmax_single_f32_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat32x4_t, svfloat32_t,
+ z0 = svmax_single_f32_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24,
+ svmax_single_f32_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** fmax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svfloat64x2_t, z0,
+ svmax_f64_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** fmax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svfloat64x2_t, z0,
+ svmax_f64_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z28\.d - z29\.d}
+** |
+** fmax [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svfloat64x2_t, z0,
+ svmax_f64_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** fmax {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svfloat64x2_t, z18,
+ svmax_f64_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svfloat64x2_t, z23,
+ svmax_f64_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** fmax {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svfloat64x2_t, z28,
+ svmax_f64_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** fmax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svfloat64x2_t, z0,
+ svmax_f64_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** fmax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svfloat64x2_t, z4,
+ svmax_f64_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmax_single_f64_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** fmax {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmax_single_f64_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmax_single_f64_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1,
+ svmax_single_f64_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1,
+ svmax_single_f64_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** fmax {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18,
+ svmax_single_f64_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** fmax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat64x2_t, svfloat64_t,
+ z0_res = svmax_single_f64_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** fmax {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat64x2_t, svfloat64_t,
+ z0 = svmax_single_f64_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24,
+ svmax_single_f64_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svfloat64x4_t, z0,
+ svmax_f64_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svfloat64x4_t, z0,
+ svmax_f64_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z28\.d - z31\.d}
+** |
+** fmax [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svfloat64x4_t, z0,
+ svmax_f64_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svfloat64x4_t, z18,
+ svmax_f64_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svfloat64x4_t, z23,
+ svmax_f64_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** fmax {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svfloat64x4_t, z28,
+ svmax_f64_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svfloat64x4_t, z0,
+ svmax_f64_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** fmax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svfloat64x4_t, z4,
+ svmax_f64_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmax_single_f64_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** fmax {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmax_single_f64_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmax_single_f64_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1,
+ svmax_single_f64_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1,
+ svmax_single_f64_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmax [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18,
+ svmax_single_f64_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** fmax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat64x4_t, svfloat64_t,
+ z0_res = svmax_single_f64_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat64x4_t, svfloat64_t,
+ z0 = svmax_single_f64_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24,
+ svmax_single_f64_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** smax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svint16x2_t, z0,
+ svmax_s16_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** smax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svint16x2_t, z0,
+ svmax_s16_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.h - z29\.h}
+** |
+** smax [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svint16x2_t, z0,
+ svmax_s16_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** smax {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svint16x2_t, z18,
+ svmax_s16_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svint16x2_t, z23,
+ svmax_s16_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** smax {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svint16x2_t, z28,
+ svmax_s16_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** smax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svint16x2_t, z0,
+ svmax_s16_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** smax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svint16x2_t, z4,
+ svmax_s16_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svint16x2_t, svint16_t, z24,
+ svmax_single_s16_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** smax {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svint16x2_t, svint16_t, z24,
+ svmax_single_s16_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svint16x2_t, svint16_t, z24,
+ svmax_single_s16_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svint16x2_t, svint16_t, z1,
+ svmax_single_s16_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** smax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svint16x2_t, svint16_t, z1,
+ svmax_single_s16_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** smax {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svint16x2_t, svint16_t, z18,
+ svmax_single_s16_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** smax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint16x2_t, svint16_t,
+ z0_res = svmax_single_s16_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** smax {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint16x2_t, svint16_t,
+ z0 = svmax_single_s16_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svint16x2_t, svint16_t, z24,
+ svmax_single_s16_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svint16x4_t, z0,
+ svmax_s16_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svint16x4_t, z0,
+ svmax_s16_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.h - z31\.h}
+** |
+** smax [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svint16x4_t, z0,
+ svmax_s16_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svint16x4_t, z18,
+ svmax_s16_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svint16x4_t, z23,
+ svmax_s16_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** smax {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svint16x4_t, z28,
+ svmax_s16_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svint16x4_t, z0,
+ svmax_s16_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** smax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svint16x4_t, z4,
+ svmax_s16_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svint16x4_t, svint16_t, z24,
+ svmax_single_s16_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** smax {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svint16x4_t, svint16_t, z24,
+ svmax_single_s16_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svint16x4_t, svint16_t, z24,
+ svmax_single_s16_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svint16x4_t, svint16_t, z1,
+ svmax_single_s16_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svint16x4_t, svint16_t, z1,
+ svmax_single_s16_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svint16x4_t, svint16_t, z18,
+ svmax_single_s16_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** smax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint16x4_t, svint16_t,
+ z0_res = svmax_single_s16_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint16x4_t, svint16_t,
+ z0 = svmax_single_s16_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svint16x4_t, svint16_t, z24,
+ svmax_single_s16_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** smax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svint32x2_t, z0,
+ svmax_s32_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** smax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svint32x2_t, z0,
+ svmax_s32_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.s - z29\.s}
+** |
+** smax [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svint32x2_t, z0,
+ svmax_s32_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** smax {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svint32x2_t, z18,
+ svmax_s32_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svint32x2_t, z23,
+ svmax_s32_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** smax {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svint32x2_t, z28,
+ svmax_s32_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** smax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svint32x2_t, z0,
+ svmax_s32_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** smax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svint32x2_t, z4,
+ svmax_s32_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svint32x2_t, svint32_t, z24,
+ svmax_single_s32_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** smax {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svint32x2_t, svint32_t, z24,
+ svmax_single_s32_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svint32x2_t, svint32_t, z24,
+ svmax_single_s32_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svint32x2_t, svint32_t, z1,
+ svmax_single_s32_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** smax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svint32x2_t, svint32_t, z1,
+ svmax_single_s32_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** smax {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svint32x2_t, svint32_t, z18,
+ svmax_single_s32_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** smax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint32x2_t, svint32_t,
+ z0_res = svmax_single_s32_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** smax {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint32x2_t, svint32_t,
+ z0 = svmax_single_s32_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svint32x2_t, svint32_t, z24,
+ svmax_single_s32_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svint32x4_t, z0,
+ svmax_s32_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svint32x4_t, z0,
+ svmax_s32_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.s - z31\.s}
+** |
+** smax [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svint32x4_t, z0,
+ svmax_s32_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svint32x4_t, z18,
+ svmax_s32_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svint32x4_t, z23,
+ svmax_s32_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** smax {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svint32x4_t, z28,
+ svmax_s32_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svint32x4_t, z0,
+ svmax_s32_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** smax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svint32x4_t, z4,
+ svmax_s32_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svint32x4_t, svint32_t, z24,
+ svmax_single_s32_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** smax {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svint32x4_t, svint32_t, z24,
+ svmax_single_s32_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svint32x4_t, svint32_t, z24,
+ svmax_single_s32_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svint32x4_t, svint32_t, z1,
+ svmax_single_s32_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svint32x4_t, svint32_t, z1,
+ svmax_single_s32_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svint32x4_t, svint32_t, z18,
+ svmax_single_s32_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** smax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint32x4_t, svint32_t,
+ z0_res = svmax_single_s32_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint32x4_t, svint32_t,
+ z0 = svmax_single_s32_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svint32x4_t, svint32_t, z24,
+ svmax_single_s32_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** smax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svint64x2_t, z0,
+ svmax_s64_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** smax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svint64x2_t, z0,
+ svmax_s64_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.d - z29\.d}
+** |
+** smax [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svint64x2_t, z0,
+ svmax_s64_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** smax {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svint64x2_t, z18,
+ svmax_s64_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svint64x2_t, z23,
+ svmax_s64_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** smax {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svint64x2_t, z28,
+ svmax_s64_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** smax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svint64x2_t, z0,
+ svmax_s64_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** smax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svint64x2_t, z4,
+ svmax_s64_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svint64x2_t, svint64_t, z24,
+ svmax_single_s64_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** smax {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svint64x2_t, svint64_t, z24,
+ svmax_single_s64_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svint64x2_t, svint64_t, z24,
+ svmax_single_s64_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svint64x2_t, svint64_t, z1,
+ svmax_single_s64_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** smax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svint64x2_t, svint64_t, z1,
+ svmax_single_s64_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** smax {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svint64x2_t, svint64_t, z18,
+ svmax_single_s64_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** smax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint64x2_t, svint64_t,
+ z0_res = svmax_single_s64_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** smax {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint64x2_t, svint64_t,
+ z0 = svmax_single_s64_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svint64x2_t, svint64_t, z24,
+ svmax_single_s64_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svint64x4_t, z0,
+ svmax_s64_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svint64x4_t, z0,
+ svmax_s64_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.d - z31\.d}
+** |
+** smax [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svint64x4_t, z0,
+ svmax_s64_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svint64x4_t, z18,
+ svmax_s64_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svint64x4_t, z23,
+ svmax_s64_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** smax {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svint64x4_t, z28,
+ svmax_s64_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svint64x4_t, z0,
+ svmax_s64_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** smax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svint64x4_t, z4,
+ svmax_s64_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svint64x4_t, svint64_t, z24,
+ svmax_single_s64_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** smax {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svint64x4_t, svint64_t, z24,
+ svmax_single_s64_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svint64x4_t, svint64_t, z24,
+ svmax_single_s64_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svint64x4_t, svint64_t, z1,
+ svmax_single_s64_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svint64x4_t, svint64_t, z1,
+ svmax_single_s64_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svint64x4_t, svint64_t, z18,
+ svmax_single_s64_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** smax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint64x4_t, svint64_t,
+ z0_res = svmax_single_s64_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint64x4_t, svint64_t,
+ z0 = svmax_single_s64_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svint64x4_t, svint64_t, z24,
+ svmax_single_s64_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** smax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svint8x2_t, z0,
+ svmax_s8_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** smax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svint8x2_t, z0,
+ svmax_s8_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.b - z29\.b}
+** |
+** smax [^\n]+, {z28\.b - z29\.b}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svint8x2_t, z0,
+ svmax_s8_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** smax {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svint8x2_t, z18,
+ svmax_s8_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svint8x2_t, z23,
+ svmax_s8_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** smax {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svint8x2_t, z28,
+ svmax_s8_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** smax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svint8x2_t, z0,
+ svmax_s8_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** |
+** smax {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svint8x2_t, z4,
+ svmax_s8_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svint8x2_t, svint8_t, z24,
+ svmax_single_s8_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** |
+** smax {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svint8x2_t, svint8_t, z24,
+ svmax_single_s8_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svint8x2_t, svint8_t, z24,
+ svmax_single_s8_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svint8x2_t, svint8_t, z1,
+ svmax_single_s8_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** smax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svint8x2_t, svint8_t, z1,
+ svmax_single_s8_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** smax {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svint8x2_t, svint8_t, z18,
+ svmax_single_s8_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** smax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint8x2_t, svint8_t,
+ z0_res = svmax_single_s8_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** smax {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint8x2_t, svint8_t,
+ z0 = svmax_single_s8_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svint8x2_t, svint8_t, z24,
+ svmax_single_s8_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svint8x4_t, z0,
+ svmax_s8_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svint8x4_t, z0,
+ svmax_s8_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.b - z31\.b}
+** |
+** smax [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svint8x4_t, z0,
+ svmax_s8_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svint8x4_t, z18,
+ svmax_s8_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svint8x4_t, z23,
+ svmax_s8_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** smax {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svint8x4_t, z28,
+ svmax_s8_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** |
+** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svint8x4_t, z0,
+ svmax_s8_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** |
+** smax {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svint8x4_t, z4,
+ svmax_s8_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svint8x4_t, svint8_t, z24,
+ svmax_single_s8_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** |
+** smax {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svint8x4_t, svint8_t, z24,
+ svmax_single_s8_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svint8x4_t, svint8_t, z24,
+ svmax_single_s8_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svint8x4_t, svint8_t, z1,
+ svmax_single_s8_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svint8x4_t, svint8_t, z1,
+ svmax_single_s8_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smax [^\n]+, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svint8x4_t, svint8_t, z18,
+ svmax_single_s8_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** smax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint8x4_t, svint8_t,
+ z0_res = svmax_single_s8_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint8x4_t, svint8_t,
+ z0 = svmax_single_s8_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svint8x4_t, svint8_t, z24,
+ svmax_single_s8_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** umax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svuint16x2_t, z0,
+ svmax_u16_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** umax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svuint16x2_t, z0,
+ svmax_u16_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.h - z29\.h}
+** |
+** umax [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svuint16x2_t, z0,
+ svmax_u16_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** umax {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svuint16x2_t, z18,
+ svmax_u16_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svuint16x2_t, z23,
+ svmax_u16_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** umax {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svuint16x2_t, z28,
+ svmax_u16_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** umax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svuint16x2_t, z0,
+ svmax_u16_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** umax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svuint16x2_t, z4,
+ svmax_u16_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svuint16x2_t, svuint16_t, z24,
+ svmax_single_u16_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** umax {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svuint16x2_t, svuint16_t, z24,
+ svmax_single_u16_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svuint16x2_t, svuint16_t, z24,
+ svmax_single_u16_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svuint16x2_t, svuint16_t, z1,
+ svmax_single_u16_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** umax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svuint16x2_t, svuint16_t, z1,
+ svmax_single_u16_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** umax {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svuint16x2_t, svuint16_t, z18,
+ svmax_single_u16_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** umax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint16x2_t, svuint16_t,
+ z0_res = svmax_single_u16_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** umax {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint16x2_t, svuint16_t,
+ z0 = svmax_single_u16_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svuint16x2_t, svuint16_t, z24,
+ svmax_single_u16_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svuint16x4_t, z0,
+ svmax_u16_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svuint16x4_t, z0,
+ svmax_u16_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.h - z31\.h}
+** |
+** umax [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svuint16x4_t, z0,
+ svmax_u16_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svuint16x4_t, z18,
+ svmax_u16_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svuint16x4_t, z23,
+ svmax_u16_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** umax {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svuint16x4_t, z28,
+ svmax_u16_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svuint16x4_t, z0,
+ svmax_u16_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** umax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svuint16x4_t, z4,
+ svmax_u16_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svuint16x4_t, svuint16_t, z24,
+ svmax_single_u16_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** umax {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svuint16x4_t, svuint16_t, z24,
+ svmax_single_u16_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svuint16x4_t, svuint16_t, z24,
+ svmax_single_u16_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svuint16x4_t, svuint16_t, z1,
+ svmax_single_u16_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svuint16x4_t, svuint16_t, z1,
+ svmax_single_u16_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svuint16x4_t, svuint16_t, z18,
+ svmax_single_u16_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** umax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint16x4_t, svuint16_t,
+ z0_res = svmax_single_u16_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint16x4_t, svuint16_t,
+ z0 = svmax_single_u16_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svuint16x4_t, svuint16_t, z24,
+ svmax_single_u16_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** umax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svuint32x2_t, z0,
+ svmax_u32_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** umax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svuint32x2_t, z0,
+ svmax_u32_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.s - z29\.s}
+** |
+** umax [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svuint32x2_t, z0,
+ svmax_u32_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** umax {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svuint32x2_t, z18,
+ svmax_u32_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svuint32x2_t, z23,
+ svmax_u32_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** umax {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svuint32x2_t, z28,
+ svmax_u32_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** umax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svuint32x2_t, z0,
+ svmax_u32_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** umax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svuint32x2_t, z4,
+ svmax_u32_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svuint32x2_t, svuint32_t, z24,
+ svmax_single_u32_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** umax {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svuint32x2_t, svuint32_t, z24,
+ svmax_single_u32_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svuint32x2_t, svuint32_t, z24,
+ svmax_single_u32_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svuint32x2_t, svuint32_t, z1,
+ svmax_single_u32_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** umax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svuint32x2_t, svuint32_t, z1,
+ svmax_single_u32_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** umax {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svuint32x2_t, svuint32_t, z18,
+ svmax_single_u32_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** umax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint32x2_t, svuint32_t,
+ z0_res = svmax_single_u32_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** umax {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint32x2_t, svuint32_t,
+ z0 = svmax_single_u32_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svuint32x2_t, svuint32_t, z24,
+ svmax_single_u32_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svuint32x4_t, z0,
+ svmax_u32_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svuint32x4_t, z0,
+ svmax_u32_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.s - z31\.s}
+** |
+** umax [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svuint32x4_t, z0,
+ svmax_u32_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svuint32x4_t, z18,
+ svmax_u32_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svuint32x4_t, z23,
+ svmax_u32_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** umax {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svuint32x4_t, z28,
+ svmax_u32_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svuint32x4_t, z0,
+ svmax_u32_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** umax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svuint32x4_t, z4,
+ svmax_u32_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svuint32x4_t, svuint32_t, z24,
+ svmax_single_u32_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** umax {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svuint32x4_t, svuint32_t, z24,
+ svmax_single_u32_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svuint32x4_t, svuint32_t, z24,
+ svmax_single_u32_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svuint32x4_t, svuint32_t, z1,
+ svmax_single_u32_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svuint32x4_t, svuint32_t, z1,
+ svmax_single_u32_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svuint32x4_t, svuint32_t, z18,
+ svmax_single_u32_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** umax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint32x4_t, svuint32_t,
+ z0_res = svmax_single_u32_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint32x4_t, svuint32_t,
+ z0 = svmax_single_u32_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svuint32x4_t, svuint32_t, z24,
+ svmax_single_u32_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** umax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svuint64x2_t, z0,
+ svmax_u64_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** umax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svuint64x2_t, z0,
+ svmax_u64_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.d - z29\.d}
+** |
+** umax [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svuint64x2_t, z0,
+ svmax_u64_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** umax {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svuint64x2_t, z18,
+ svmax_u64_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svuint64x2_t, z23,
+ svmax_u64_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** umax {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svuint64x2_t, z28,
+ svmax_u64_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** umax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svuint64x2_t, z0,
+ svmax_u64_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** umax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svuint64x2_t, z4,
+ svmax_u64_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svuint64x2_t, svuint64_t, z24,
+ svmax_single_u64_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** umax {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svuint64x2_t, svuint64_t, z24,
+ svmax_single_u64_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svuint64x2_t, svuint64_t, z24,
+ svmax_single_u64_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svuint64x2_t, svuint64_t, z1,
+ svmax_single_u64_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** umax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svuint64x2_t, svuint64_t, z1,
+ svmax_single_u64_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** umax {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svuint64x2_t, svuint64_t, z18,
+ svmax_single_u64_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** umax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint64x2_t, svuint64_t,
+ z0_res = svmax_single_u64_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** umax {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint64x2_t, svuint64_t,
+ z0 = svmax_single_u64_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svuint64x2_t, svuint64_t, z24,
+ svmax_single_u64_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svuint64x4_t, z0,
+ svmax_u64_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svuint64x4_t, z0,
+ svmax_u64_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.d - z31\.d}
+** |
+** umax [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svuint64x4_t, z0,
+ svmax_u64_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svuint64x4_t, z18,
+ svmax_u64_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svuint64x4_t, z23,
+ svmax_u64_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** umax {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svuint64x4_t, z28,
+ svmax_u64_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svuint64x4_t, z0,
+ svmax_u64_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** umax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svuint64x4_t, z4,
+ svmax_u64_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svuint64x4_t, svuint64_t, z24,
+ svmax_single_u64_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** umax {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svuint64x4_t, svuint64_t, z24,
+ svmax_single_u64_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svuint64x4_t, svuint64_t, z24,
+ svmax_single_u64_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svuint64x4_t, svuint64_t, z1,
+ svmax_single_u64_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svuint64x4_t, svuint64_t, z1,
+ svmax_single_u64_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svuint64x4_t, svuint64_t, z18,
+ svmax_single_u64_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** umax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint64x4_t, svuint64_t,
+ z0_res = svmax_single_u64_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint64x4_t, svuint64_t,
+ z0 = svmax_single_u64_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svuint64x4_t, svuint64_t, z24,
+ svmax_single_u64_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** umax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svuint8x2_t, z0,
+ svmax_u8_x2 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** umax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svuint8x2_t, z0,
+ svmax_u8_x2 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.b - z29\.b}
+** |
+** umax [^\n]+, {z28\.b - z29\.b}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svuint8x2_t, z0,
+ svmax_u8_x2 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** umax {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svuint8x2_t, z18,
+ svmax_u8_x2 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z18, svuint8x2_t, z23,
+ svmax_u8_x2 (z23, z18),
+ svmax (z23, z18))
+
+/*
+** max_z28_z28_z0:
+** umax {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svuint8x2_t, z28,
+ svmax_u8_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** umax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svuint8x2_t, z0,
+ svmax_u8_x2 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** |
+** umax {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svuint8x2_t, z4,
+ svmax_u8_x2 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svuint8x2_t, svuint8_t, z24,
+ svmax_single_u8_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** |
+** umax {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svuint8x2_t, svuint8_t, z24,
+ svmax_single_u8_x2 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svuint8x2_t, svuint8_t, z24,
+ svmax_single_u8_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svuint8x2_t, svuint8_t, z1,
+ svmax_single_u8_x2 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** umax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svuint8x2_t, svuint8_t, z1,
+ svmax_single_u8_x2 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** umax {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svuint8x2_t, svuint8_t, z18,
+ svmax_single_u8_x2 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** umax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint8x2_t, svuint8_t,
+ z0_res = svmax_single_u8_x2 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** umax {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint8x2_t, svuint8_t,
+ z0 = svmax_single_u8_x2 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svuint8x2_t, svuint8_t, z24,
+ svmax_single_u8_x2 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** max_z0_z0_z4:
+** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (max_z0_z0_z4, svuint8x4_t, z0,
+ svmax_u8_x4 (z0, z4),
+ svmax (z0, z4))
+
+/*
+** max_z0_z4_z0:
+** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (max_z0_z4_z0, svuint8x4_t, z0,
+ svmax_u8_x4 (z4, z0),
+ svmax (z4, z0))
+
+/*
+** max_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.b - z31\.b}
+** |
+** umax [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z4_z28, svuint8x4_t, z0,
+ svmax_u8_x4 (z4, z28),
+ svmax (z4, z28))
+
+/*
+** max_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z18_z18_z4, svuint8x4_t, z18,
+ svmax_u8_x4 (z18, z4),
+ svmax (z18, z4))
+
+/*
+** max_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (max_z23_z23_z28, svuint8x4_t, z23,
+ svmax_u8_x4 (z23, z28),
+ svmax (z23, z28))
+
+/*
+** max_z28_z28_z0:
+** umax {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (max_z28_z28_z0, svuint8x4_t, z28,
+ svmax_u8_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** |
+** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z0_z0_z18, svuint8x4_t, z0,
+ svmax_u8_x4 (z0, z18),
+ svmax (z0, z18))
+
+/*
+** max_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** |
+** umax {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (max_z4_z4_z23, svuint8x4_t, z4,
+ svmax_u8_x4 (z4, z23),
+ svmax (z4, z23))
+
+/*
+** max_single_z24_z24_z0:
+** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z0, svuint8x4_t, svuint8_t, z24,
+ svmax_single_u8_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** |
+** umax {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z28_z0, svuint8x4_t, svuint8_t, z24,
+ svmax_single_u8_x4 (z28, z0),
+ svmax (z28, z0))
+
+/*
+** max_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z1_z0, svuint8x4_t, svuint8_t, z24,
+ svmax_single_u8_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z1_z24_z0:
+** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z24_z0, svuint8x4_t, svuint8_t, z1,
+ svmax_single_u8_x4 (z24, z0),
+ svmax (z24, z0))
+
+/*
+** max_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z1_z1_z0, svuint8x4_t, svuint8_t, z1,
+ svmax_single_u8_x4 (z1, z0),
+ svmax (z1, z0))
+
+/*
+** max_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umax [^\n]+, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (max_single_z18_z18_z0, svuint8x4_t, svuint8_t, z18,
+ svmax_single_u8_x4 (z18, z0),
+ svmax (z18, z0))
+
+/*
+** max_single_awkward:
+** ...
+** umax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint8x4_t, svuint8_t,
+ z0_res = svmax_single_u8_x4 (z1, z0),
+ z0_res = svmax (z1, z0))
+
+/*
+** max_single_z0_z0_z15:
+** ...
+** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint8x4_t, svuint8_t,
+ z0 = svmax_single_u8_x4 (z0, z15),
+ z0 = svmax (z0, z15))
+
+/*
+** max_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (max_single_z24_z24_z16, svuint8x4_t, svuint8_t, z24,
+ svmax_single_u8_x4 (z24, z16),
+ svmax (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** maxnm_z0_z0_z4:
+** fmaxnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z4, svfloat16x2_t, z0,
+ svmaxnm_f16_x2 (z0, z4),
+ svmaxnm (z0, z4))
+
+/*
+** maxnm_z0_z4_z0:
+** fmaxnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z0, svfloat16x2_t, z0,
+ svmaxnm_f16_x2 (z4, z0),
+ svmaxnm (z4, z0))
+
+/*
+** maxnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z28\.h - z29\.h}
+** |
+** fmaxnm [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z28, svfloat16x2_t, z0,
+ svmaxnm_f16_x2 (z4, z28),
+ svmaxnm (z4, z28))
+
+/*
+** maxnm_z18_z18_z4:
+** fmaxnm {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (maxnm_z18_z18_z4, svfloat16x2_t, z18,
+ svmaxnm_f16_x2 (z18, z4),
+ svmaxnm (z18, z4))
+
+/*
+** maxnm_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (maxnm_z23_z23_z18, svfloat16x2_t, z23,
+ svmaxnm_f16_x2 (z23, z18),
+ svmaxnm (z23, z18))
+
+/*
+** maxnm_z28_z28_z0:
+** fmaxnm {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (maxnm_z28_z28_z0, svfloat16x2_t, z28,
+ svmaxnm_f16_x2 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_z0_z0_z18:
+** fmaxnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z18, svfloat16x2_t, z0,
+ svmaxnm_f16_x2 (z0, z18),
+ svmaxnm (z0, z18))
+
+/*
+** maxnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** fmaxnm {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z4_z4_z23, svfloat16x2_t, z4,
+ svmaxnm_f16_x2 (z4, z23),
+ svmaxnm (z4, z23))
+
+/*
+** maxnm_single_z24_z24_z0:
+** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmaxnm_single_f16_x2 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** fmaxnm {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmaxnm_single_f16_x2 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmaxnm_single_f16_x2 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z1_z24_z0:
+** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1,
+ svmaxnm_single_f16_x2 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1,
+ svmaxnm_single_f16_x2 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z18_z18_z0:
+** fmaxnm {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18,
+ svmaxnm_single_f16_x2 (z18, z0),
+ svmaxnm (z18, z0))
+
+/*
+** maxnm_single_awkward:
+** ...
+** fmaxnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat16x2_t, svfloat16_t,
+ z0_res = svmaxnm_single_f16_x2 (z1, z0),
+ z0_res = svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z0_z0_z15:
+** ...
+** fmaxnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat16x2_t, svfloat16_t,
+ z0 = svmaxnm_single_f16_x2 (z0, z15),
+ z0 = svmaxnm (z0, z15))
+
+/*
+** maxnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24,
+ svmaxnm_single_f16_x2 (z24, z16),
+ svmaxnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** maxnm_z0_z0_z4:
+** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z4, svfloat16x4_t, z0,
+ svmaxnm_f16_x4 (z0, z4),
+ svmaxnm (z0, z4))
+
+/*
+** maxnm_z0_z4_z0:
+** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z0, svfloat16x4_t, z0,
+ svmaxnm_f16_x4 (z4, z0),
+ svmaxnm (z4, z0))
+
+/*
+** maxnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z28\.h - z31\.h}
+** |
+** fmaxnm [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z28, svfloat16x4_t, z0,
+ svmaxnm_f16_x4 (z4, z28),
+ svmaxnm (z4, z28))
+
+/*
+** maxnm_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (maxnm_z18_z18_z4, svfloat16x4_t, z18,
+ svmaxnm_f16_x4 (z18, z4),
+ svmaxnm (z18, z4))
+
+/*
+** maxnm_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (maxnm_z23_z23_z28, svfloat16x4_t, z23,
+ svmaxnm_f16_x4 (z23, z28),
+ svmaxnm (z23, z28))
+
+/*
+** maxnm_z28_z28_z0:
+** fmaxnm {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (maxnm_z28_z28_z0, svfloat16x4_t, z28,
+ svmaxnm_f16_x4 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z18, svfloat16x4_t, z0,
+ svmaxnm_f16_x4 (z0, z18),
+ svmaxnm (z0, z18))
+
+/*
+** maxnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** fmaxnm {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z4_z4_z23, svfloat16x4_t, z4,
+ svmaxnm_f16_x4 (z4, z23),
+ svmaxnm (z4, z23))
+
+/*
+** maxnm_single_z24_z24_z0:
+** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmaxnm_single_f16_x4 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** fmaxnm {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmaxnm_single_f16_x4 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmaxnm_single_f16_x4 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z1_z24_z0:
+** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1,
+ svmaxnm_single_f16_x4 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1,
+ svmaxnm_single_f16_x4 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18,
+ svmaxnm_single_f16_x4 (z18, z0),
+ svmaxnm (z18, z0))
+
+/*
+** maxnm_single_awkward:
+** ...
+** fmaxnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat16x4_t, svfloat16_t,
+ z0_res = svmaxnm_single_f16_x4 (z1, z0),
+ z0_res = svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z0_z0_z15:
+** ...
+** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat16x4_t, svfloat16_t,
+ z0 = svmaxnm_single_f16_x4 (z0, z15),
+ z0 = svmaxnm (z0, z15))
+
+/*
+** maxnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24,
+ svmaxnm_single_f16_x4 (z24, z16),
+ svmaxnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** maxnm_z0_z0_z4:
+** fmaxnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z4, svfloat32x2_t, z0,
+ svmaxnm_f32_x2 (z0, z4),
+ svmaxnm (z0, z4))
+
+/*
+** maxnm_z0_z4_z0:
+** fmaxnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z0, svfloat32x2_t, z0,
+ svmaxnm_f32_x2 (z4, z0),
+ svmaxnm (z4, z0))
+
+/*
+** maxnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z28\.s - z29\.s}
+** |
+** fmaxnm [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z28, svfloat32x2_t, z0,
+ svmaxnm_f32_x2 (z4, z28),
+ svmaxnm (z4, z28))
+
+/*
+** maxnm_z18_z18_z4:
+** fmaxnm {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (maxnm_z18_z18_z4, svfloat32x2_t, z18,
+ svmaxnm_f32_x2 (z18, z4),
+ svmaxnm (z18, z4))
+
+/*
+** maxnm_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (maxnm_z23_z23_z18, svfloat32x2_t, z23,
+ svmaxnm_f32_x2 (z23, z18),
+ svmaxnm (z23, z18))
+
+/*
+** maxnm_z28_z28_z0:
+** fmaxnm {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (maxnm_z28_z28_z0, svfloat32x2_t, z28,
+ svmaxnm_f32_x2 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_z0_z0_z18:
+** fmaxnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z18, svfloat32x2_t, z0,
+ svmaxnm_f32_x2 (z0, z18),
+ svmaxnm (z0, z18))
+
+/*
+** maxnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** fmaxnm {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z4_z4_z23, svfloat32x2_t, z4,
+ svmaxnm_f32_x2 (z4, z23),
+ svmaxnm (z4, z23))
+
+/*
+** maxnm_single_z24_z24_z0:
+** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmaxnm_single_f32_x2 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** fmaxnm {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmaxnm_single_f32_x2 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmaxnm_single_f32_x2 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z1_z24_z0:
+** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1,
+ svmaxnm_single_f32_x2 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1,
+ svmaxnm_single_f32_x2 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z18_z18_z0:
+** fmaxnm {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18,
+ svmaxnm_single_f32_x2 (z18, z0),
+ svmaxnm (z18, z0))
+
+/*
+** maxnm_single_awkward:
+** ...
+** fmaxnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat32x2_t, svfloat32_t,
+ z0_res = svmaxnm_single_f32_x2 (z1, z0),
+ z0_res = svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z0_z0_z15:
+** ...
+** fmaxnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat32x2_t, svfloat32_t,
+ z0 = svmaxnm_single_f32_x2 (z0, z15),
+ z0 = svmaxnm (z0, z15))
+
+/*
+** maxnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24,
+ svmaxnm_single_f32_x2 (z24, z16),
+ svmaxnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** maxnm_z0_z0_z4:
+** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z4, svfloat32x4_t, z0,
+ svmaxnm_f32_x4 (z0, z4),
+ svmaxnm (z0, z4))
+
+/*
+** maxnm_z0_z4_z0:
+** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z0, svfloat32x4_t, z0,
+ svmaxnm_f32_x4 (z4, z0),
+ svmaxnm (z4, z0))
+
+/*
+** maxnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z28\.s - z31\.s}
+** |
+** fmaxnm [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z28, svfloat32x4_t, z0,
+ svmaxnm_f32_x4 (z4, z28),
+ svmaxnm (z4, z28))
+
+/*
+** maxnm_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (maxnm_z18_z18_z4, svfloat32x4_t, z18,
+ svmaxnm_f32_x4 (z18, z4),
+ svmaxnm (z18, z4))
+
+/*
+** maxnm_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (maxnm_z23_z23_z28, svfloat32x4_t, z23,
+ svmaxnm_f32_x4 (z23, z28),
+ svmaxnm (z23, z28))
+
+/*
+** maxnm_z28_z28_z0:
+** fmaxnm {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (maxnm_z28_z28_z0, svfloat32x4_t, z28,
+ svmaxnm_f32_x4 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z18, svfloat32x4_t, z0,
+ svmaxnm_f32_x4 (z0, z18),
+ svmaxnm (z0, z18))
+
+/*
+** maxnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** fmaxnm {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z4_z4_z23, svfloat32x4_t, z4,
+ svmaxnm_f32_x4 (z4, z23),
+ svmaxnm (z4, z23))
+
+/*
+** maxnm_single_z24_z24_z0:
+** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmaxnm_single_f32_x4 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** fmaxnm {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmaxnm_single_f32_x4 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmaxnm_single_f32_x4 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z1_z24_z0:
+** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1,
+ svmaxnm_single_f32_x4 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1,
+ svmaxnm_single_f32_x4 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18,
+ svmaxnm_single_f32_x4 (z18, z0),
+ svmaxnm (z18, z0))
+
+/*
+** maxnm_single_awkward:
+** ...
+** fmaxnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat32x4_t, svfloat32_t,
+ z0_res = svmaxnm_single_f32_x4 (z1, z0),
+ z0_res = svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z0_z0_z15:
+** ...
+** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat32x4_t, svfloat32_t,
+ z0 = svmaxnm_single_f32_x4 (z0, z15),
+ z0 = svmaxnm (z0, z15))
+
+/*
+** maxnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24,
+ svmaxnm_single_f32_x4 (z24, z16),
+ svmaxnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** maxnm_z0_z0_z4:
+** fmaxnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z4, svfloat64x2_t, z0,
+ svmaxnm_f64_x2 (z0, z4),
+ svmaxnm (z0, z4))
+
+/*
+** maxnm_z0_z4_z0:
+** fmaxnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z0, svfloat64x2_t, z0,
+ svmaxnm_f64_x2 (z4, z0),
+ svmaxnm (z4, z0))
+
+/*
+** maxnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z28\.d - z29\.d}
+** |
+** fmaxnm [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z28, svfloat64x2_t, z0,
+ svmaxnm_f64_x2 (z4, z28),
+ svmaxnm (z4, z28))
+
+/*
+** maxnm_z18_z18_z4:
+** fmaxnm {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (maxnm_z18_z18_z4, svfloat64x2_t, z18,
+ svmaxnm_f64_x2 (z18, z4),
+ svmaxnm (z18, z4))
+
+/*
+** maxnm_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (maxnm_z23_z23_z18, svfloat64x2_t, z23,
+ svmaxnm_f64_x2 (z23, z18),
+ svmaxnm (z23, z18))
+
+/*
+** maxnm_z28_z28_z0:
+** fmaxnm {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (maxnm_z28_z28_z0, svfloat64x2_t, z28,
+ svmaxnm_f64_x2 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_z0_z0_z18:
+** fmaxnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z18, svfloat64x2_t, z0,
+ svmaxnm_f64_x2 (z0, z18),
+ svmaxnm (z0, z18))
+
+/*
+** maxnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** fmaxnm {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z4_z4_z23, svfloat64x2_t, z4,
+ svmaxnm_f64_x2 (z4, z23),
+ svmaxnm (z4, z23))
+
+/*
+** maxnm_single_z24_z24_z0:
+** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmaxnm_single_f64_x2 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** fmaxnm {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmaxnm_single_f64_x2 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmaxnm_single_f64_x2 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z1_z24_z0:
+** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1,
+ svmaxnm_single_f64_x2 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1,
+ svmaxnm_single_f64_x2 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z18_z18_z0:
+** fmaxnm {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18,
+ svmaxnm_single_f64_x2 (z18, z0),
+ svmaxnm (z18, z0))
+
+/*
+** maxnm_single_awkward:
+** ...
+** fmaxnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat64x2_t, svfloat64_t,
+ z0_res = svmaxnm_single_f64_x2 (z1, z0),
+ z0_res = svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z0_z0_z15:
+** ...
+** fmaxnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat64x2_t, svfloat64_t,
+ z0 = svmaxnm_single_f64_x2 (z0, z15),
+ z0 = svmaxnm (z0, z15))
+
+/*
+** maxnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24,
+ svmaxnm_single_f64_x2 (z24, z16),
+ svmaxnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** maxnm_z0_z0_z4:
+** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z4, svfloat64x4_t, z0,
+ svmaxnm_f64_x4 (z0, z4),
+ svmaxnm (z0, z4))
+
+/*
+** maxnm_z0_z4_z0:
+** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z0, svfloat64x4_t, z0,
+ svmaxnm_f64_x4 (z4, z0),
+ svmaxnm (z4, z0))
+
+/*
+** maxnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z28\.d - z31\.d}
+** |
+** fmaxnm [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z0_z4_z28, svfloat64x4_t, z0,
+ svmaxnm_f64_x4 (z4, z28),
+ svmaxnm (z4, z28))
+
+/*
+** maxnm_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (maxnm_z18_z18_z4, svfloat64x4_t, z18,
+ svmaxnm_f64_x4 (z18, z4),
+ svmaxnm (z18, z4))
+
+/*
+** maxnm_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (maxnm_z23_z23_z28, svfloat64x4_t, z23,
+ svmaxnm_f64_x4 (z23, z28),
+ svmaxnm (z23, z28))
+
+/*
+** maxnm_z28_z28_z0:
+** fmaxnm {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (maxnm_z28_z28_z0, svfloat64x4_t, z28,
+ svmaxnm_f64_x4 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z0_z0_z18, svfloat64x4_t, z0,
+ svmaxnm_f64_x4 (z0, z18),
+ svmaxnm (z0, z18))
+
+/*
+** maxnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** fmaxnm {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (maxnm_z4_z4_z23, svfloat64x4_t, z4,
+ svmaxnm_f64_x4 (z4, z23),
+ svmaxnm (z4, z23))
+
+/*
+** maxnm_single_z24_z24_z0:
+** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmaxnm_single_f64_x4 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** fmaxnm {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmaxnm_single_f64_x4 (z28, z0),
+ svmaxnm (z28, z0))
+
+/*
+** maxnm_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmaxnm_single_f64_x4 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z1_z24_z0:
+** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1,
+ svmaxnm_single_f64_x4 (z24, z0),
+ svmaxnm (z24, z0))
+
+/*
+** maxnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1,
+ svmaxnm_single_f64_x4 (z1, z0),
+ svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmaxnm [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18,
+ svmaxnm_single_f64_x4 (z18, z0),
+ svmaxnm (z18, z0))
+
+/*
+** maxnm_single_awkward:
+** ...
+** fmaxnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat64x4_t, svfloat64_t,
+ z0_res = svmaxnm_single_f64_x4 (z1, z0),
+ z0_res = svmaxnm (z1, z0))
+
+/*
+** maxnm_single_z0_z0_z15:
+** ...
+** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat64x4_t, svfloat64_t,
+ z0 = svmaxnm_single_f64_x4 (z0, z15),
+ z0 = svmaxnm (z0, z15))
+
+/*
+** maxnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24,
+ svmaxnm_single_f64_x4 (z24, z16),
+ svmaxnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** fmin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svfloat16x2_t, z0,
+ svmin_f16_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** fmin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svfloat16x2_t, z0,
+ svmin_f16_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z28\.h - z29\.h}
+** |
+** fmin [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svfloat16x2_t, z0,
+ svmin_f16_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** fmin {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svfloat16x2_t, z18,
+ svmin_f16_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svfloat16x2_t, z23,
+ svmin_f16_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** fmin {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svfloat16x2_t, z28,
+ svmin_f16_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** fmin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svfloat16x2_t, z0,
+ svmin_f16_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** fmin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svfloat16x2_t, z4,
+ svmin_f16_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmin_single_f16_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** fmin {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmin_single_f16_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmin_single_f16_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1,
+ svmin_single_f16_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1,
+ svmin_single_f16_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** fmin {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18,
+ svmin_single_f16_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** fmin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat16x2_t, svfloat16_t,
+ z0_res = svmin_single_f16_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** fmin {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat16x2_t, svfloat16_t,
+ z0 = svmin_single_f16_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24,
+ svmin_single_f16_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svfloat16x4_t, z0,
+ svmin_f16_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svfloat16x4_t, z0,
+ svmin_f16_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z28\.h - z31\.h}
+** |
+** fmin [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svfloat16x4_t, z0,
+ svmin_f16_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svfloat16x4_t, z18,
+ svmin_f16_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svfloat16x4_t, z23,
+ svmin_f16_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** fmin {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svfloat16x4_t, z28,
+ svmin_f16_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svfloat16x4_t, z0,
+ svmin_f16_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** fmin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svfloat16x4_t, z4,
+ svmin_f16_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmin_single_f16_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** fmin {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmin_single_f16_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmin_single_f16_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1,
+ svmin_single_f16_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1,
+ svmin_single_f16_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18,
+ svmin_single_f16_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** fmin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat16x4_t, svfloat16_t,
+ z0_res = svmin_single_f16_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat16x4_t, svfloat16_t,
+ z0 = svmin_single_f16_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24,
+ svmin_single_f16_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** fmin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svfloat32x2_t, z0,
+ svmin_f32_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** fmin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svfloat32x2_t, z0,
+ svmin_f32_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z28\.s - z29\.s}
+** |
+** fmin [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svfloat32x2_t, z0,
+ svmin_f32_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** fmin {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svfloat32x2_t, z18,
+ svmin_f32_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svfloat32x2_t, z23,
+ svmin_f32_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** fmin {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svfloat32x2_t, z28,
+ svmin_f32_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** fmin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svfloat32x2_t, z0,
+ svmin_f32_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** fmin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svfloat32x2_t, z4,
+ svmin_f32_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmin_single_f32_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** fmin {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmin_single_f32_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmin_single_f32_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1,
+ svmin_single_f32_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1,
+ svmin_single_f32_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** fmin {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18,
+ svmin_single_f32_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** fmin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat32x2_t, svfloat32_t,
+ z0_res = svmin_single_f32_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** fmin {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat32x2_t, svfloat32_t,
+ z0 = svmin_single_f32_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24,
+ svmin_single_f32_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svfloat32x4_t, z0,
+ svmin_f32_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svfloat32x4_t, z0,
+ svmin_f32_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z28\.s - z31\.s}
+** |
+** fmin [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svfloat32x4_t, z0,
+ svmin_f32_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svfloat32x4_t, z18,
+ svmin_f32_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svfloat32x4_t, z23,
+ svmin_f32_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** fmin {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svfloat32x4_t, z28,
+ svmin_f32_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svfloat32x4_t, z0,
+ svmin_f32_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** fmin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svfloat32x4_t, z4,
+ svmin_f32_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmin_single_f32_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** fmin {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmin_single_f32_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmin_single_f32_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1,
+ svmin_single_f32_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1,
+ svmin_single_f32_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18,
+ svmin_single_f32_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** fmin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat32x4_t, svfloat32_t,
+ z0_res = svmin_single_f32_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat32x4_t, svfloat32_t,
+ z0 = svmin_single_f32_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24,
+ svmin_single_f32_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** fmin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svfloat64x2_t, z0,
+ svmin_f64_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** fmin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svfloat64x2_t, z0,
+ svmin_f64_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z28\.d - z29\.d}
+** |
+** fmin [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svfloat64x2_t, z0,
+ svmin_f64_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** fmin {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svfloat64x2_t, z18,
+ svmin_f64_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svfloat64x2_t, z23,
+ svmin_f64_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** fmin {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svfloat64x2_t, z28,
+ svmin_f64_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** fmin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svfloat64x2_t, z0,
+ svmin_f64_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** fmin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svfloat64x2_t, z4,
+ svmin_f64_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmin_single_f64_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** fmin {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmin_single_f64_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmin_single_f64_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1,
+ svmin_single_f64_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1,
+ svmin_single_f64_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** fmin {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18,
+ svmin_single_f64_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** fmin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat64x2_t, svfloat64_t,
+ z0_res = svmin_single_f64_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** fmin {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat64x2_t, svfloat64_t,
+ z0 = svmin_single_f64_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24,
+ svmin_single_f64_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svfloat64x4_t, z0,
+ svmin_f64_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svfloat64x4_t, z0,
+ svmin_f64_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z28\.d - z31\.d}
+** |
+** fmin [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svfloat64x4_t, z0,
+ svmin_f64_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svfloat64x4_t, z18,
+ svmin_f64_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svfloat64x4_t, z23,
+ svmin_f64_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** fmin {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svfloat64x4_t, z28,
+ svmin_f64_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svfloat64x4_t, z0,
+ svmin_f64_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** fmin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svfloat64x4_t, z4,
+ svmin_f64_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmin_single_f64_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** fmin {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmin_single_f64_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmin_single_f64_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1,
+ svmin_single_f64_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1,
+ svmin_single_f64_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmin [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18,
+ svmin_single_f64_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** fmin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat64x4_t, svfloat64_t,
+ z0_res = svmin_single_f64_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat64x4_t, svfloat64_t,
+ z0 = svmin_single_f64_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24,
+ svmin_single_f64_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** smin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svint16x2_t, z0,
+ svmin_s16_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** smin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svint16x2_t, z0,
+ svmin_s16_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.h - z29\.h}
+** |
+** smin [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svint16x2_t, z0,
+ svmin_s16_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** smin {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svint16x2_t, z18,
+ svmin_s16_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svint16x2_t, z23,
+ svmin_s16_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** smin {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svint16x2_t, z28,
+ svmin_s16_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** smin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svint16x2_t, z0,
+ svmin_s16_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** smin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svint16x2_t, z4,
+ svmin_s16_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svint16x2_t, svint16_t, z24,
+ svmin_single_s16_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** smin {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svint16x2_t, svint16_t, z24,
+ svmin_single_s16_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svint16x2_t, svint16_t, z24,
+ svmin_single_s16_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svint16x2_t, svint16_t, z1,
+ svmin_single_s16_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** smin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svint16x2_t, svint16_t, z1,
+ svmin_single_s16_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** smin {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svint16x2_t, svint16_t, z18,
+ svmin_single_s16_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** smin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint16x2_t, svint16_t,
+ z0_res = svmin_single_s16_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** smin {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint16x2_t, svint16_t,
+ z0 = svmin_single_s16_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svint16x2_t, svint16_t, z24,
+ svmin_single_s16_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svint16x4_t, z0,
+ svmin_s16_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svint16x4_t, z0,
+ svmin_s16_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.h - z31\.h}
+** |
+** smin [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svint16x4_t, z0,
+ svmin_s16_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svint16x4_t, z18,
+ svmin_s16_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svint16x4_t, z23,
+ svmin_s16_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** smin {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svint16x4_t, z28,
+ svmin_s16_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svint16x4_t, z0,
+ svmin_s16_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** smin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svint16x4_t, z4,
+ svmin_s16_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svint16x4_t, svint16_t, z24,
+ svmin_single_s16_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** smin {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svint16x4_t, svint16_t, z24,
+ svmin_single_s16_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svint16x4_t, svint16_t, z24,
+ svmin_single_s16_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svint16x4_t, svint16_t, z1,
+ svmin_single_s16_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svint16x4_t, svint16_t, z1,
+ svmin_single_s16_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svint16x4_t, svint16_t, z18,
+ svmin_single_s16_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** smin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint16x4_t, svint16_t,
+ z0_res = svmin_single_s16_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint16x4_t, svint16_t,
+ z0 = svmin_single_s16_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svint16x4_t, svint16_t, z24,
+ svmin_single_s16_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** smin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svint32x2_t, z0,
+ svmin_s32_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** smin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svint32x2_t, z0,
+ svmin_s32_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.s - z29\.s}
+** |
+** smin [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svint32x2_t, z0,
+ svmin_s32_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** smin {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svint32x2_t, z18,
+ svmin_s32_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svint32x2_t, z23,
+ svmin_s32_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** smin {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svint32x2_t, z28,
+ svmin_s32_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** smin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svint32x2_t, z0,
+ svmin_s32_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** smin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svint32x2_t, z4,
+ svmin_s32_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svint32x2_t, svint32_t, z24,
+ svmin_single_s32_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** smin {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svint32x2_t, svint32_t, z24,
+ svmin_single_s32_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svint32x2_t, svint32_t, z24,
+ svmin_single_s32_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svint32x2_t, svint32_t, z1,
+ svmin_single_s32_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** smin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svint32x2_t, svint32_t, z1,
+ svmin_single_s32_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** smin {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svint32x2_t, svint32_t, z18,
+ svmin_single_s32_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** smin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint32x2_t, svint32_t,
+ z0_res = svmin_single_s32_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** smin {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint32x2_t, svint32_t,
+ z0 = svmin_single_s32_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svint32x2_t, svint32_t, z24,
+ svmin_single_s32_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svint32x4_t, z0,
+ svmin_s32_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svint32x4_t, z0,
+ svmin_s32_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.s - z31\.s}
+** |
+** smin [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svint32x4_t, z0,
+ svmin_s32_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svint32x4_t, z18,
+ svmin_s32_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svint32x4_t, z23,
+ svmin_s32_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** smin {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svint32x4_t, z28,
+ svmin_s32_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svint32x4_t, z0,
+ svmin_s32_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** smin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svint32x4_t, z4,
+ svmin_s32_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svint32x4_t, svint32_t, z24,
+ svmin_single_s32_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** smin {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svint32x4_t, svint32_t, z24,
+ svmin_single_s32_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svint32x4_t, svint32_t, z24,
+ svmin_single_s32_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svint32x4_t, svint32_t, z1,
+ svmin_single_s32_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svint32x4_t, svint32_t, z1,
+ svmin_single_s32_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svint32x4_t, svint32_t, z18,
+ svmin_single_s32_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** smin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint32x4_t, svint32_t,
+ z0_res = svmin_single_s32_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint32x4_t, svint32_t,
+ z0 = svmin_single_s32_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svint32x4_t, svint32_t, z24,
+ svmin_single_s32_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** smin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svint64x2_t, z0,
+ svmin_s64_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** smin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svint64x2_t, z0,
+ svmin_s64_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.d - z29\.d}
+** |
+** smin [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svint64x2_t, z0,
+ svmin_s64_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** smin {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svint64x2_t, z18,
+ svmin_s64_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svint64x2_t, z23,
+ svmin_s64_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** smin {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svint64x2_t, z28,
+ svmin_s64_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** smin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svint64x2_t, z0,
+ svmin_s64_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** smin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svint64x2_t, z4,
+ svmin_s64_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svint64x2_t, svint64_t, z24,
+ svmin_single_s64_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** smin {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svint64x2_t, svint64_t, z24,
+ svmin_single_s64_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svint64x2_t, svint64_t, z24,
+ svmin_single_s64_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svint64x2_t, svint64_t, z1,
+ svmin_single_s64_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** smin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svint64x2_t, svint64_t, z1,
+ svmin_single_s64_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** smin {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svint64x2_t, svint64_t, z18,
+ svmin_single_s64_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** smin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint64x2_t, svint64_t,
+ z0_res = svmin_single_s64_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** smin {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint64x2_t, svint64_t,
+ z0 = svmin_single_s64_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svint64x2_t, svint64_t, z24,
+ svmin_single_s64_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svint64x4_t, z0,
+ svmin_s64_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svint64x4_t, z0,
+ svmin_s64_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.d - z31\.d}
+** |
+** smin [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svint64x4_t, z0,
+ svmin_s64_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svint64x4_t, z18,
+ svmin_s64_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svint64x4_t, z23,
+ svmin_s64_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** smin {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svint64x4_t, z28,
+ svmin_s64_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svint64x4_t, z0,
+ svmin_s64_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** smin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svint64x4_t, z4,
+ svmin_s64_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svint64x4_t, svint64_t, z24,
+ svmin_single_s64_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** smin {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svint64x4_t, svint64_t, z24,
+ svmin_single_s64_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svint64x4_t, svint64_t, z24,
+ svmin_single_s64_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svint64x4_t, svint64_t, z1,
+ svmin_single_s64_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svint64x4_t, svint64_t, z1,
+ svmin_single_s64_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svint64x4_t, svint64_t, z18,
+ svmin_single_s64_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** smin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint64x4_t, svint64_t,
+ z0_res = svmin_single_s64_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint64x4_t, svint64_t,
+ z0 = svmin_single_s64_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svint64x4_t, svint64_t, z24,
+ svmin_single_s64_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** smin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svint8x2_t, z0,
+ svmin_s8_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** smin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svint8x2_t, z0,
+ svmin_s8_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.b - z29\.b}
+** |
+** smin [^\n]+, {z28\.b - z29\.b}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svint8x2_t, z0,
+ svmin_s8_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** smin {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svint8x2_t, z18,
+ svmin_s8_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svint8x2_t, z23,
+ svmin_s8_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** smin {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svint8x2_t, z28,
+ svmin_s8_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** smin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svint8x2_t, z0,
+ svmin_s8_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** |
+** smin {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svint8x2_t, z4,
+ svmin_s8_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svint8x2_t, svint8_t, z24,
+ svmin_single_s8_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** |
+** smin {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svint8x2_t, svint8_t, z24,
+ svmin_single_s8_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svint8x2_t, svint8_t, z24,
+ svmin_single_s8_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svint8x2_t, svint8_t, z1,
+ svmin_single_s8_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** smin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svint8x2_t, svint8_t, z1,
+ svmin_single_s8_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** smin {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svint8x2_t, svint8_t, z18,
+ svmin_single_s8_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** smin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint8x2_t, svint8_t,
+ z0_res = svmin_single_s8_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** smin {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint8x2_t, svint8_t,
+ z0 = svmin_single_s8_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svint8x2_t, svint8_t, z24,
+ svmin_single_s8_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svint8x4_t, z0,
+ svmin_s8_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svint8x4_t, z0,
+ svmin_s8_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.b - z31\.b}
+** |
+** smin [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svint8x4_t, z0,
+ svmin_s8_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svint8x4_t, z18,
+ svmin_s8_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svint8x4_t, z23,
+ svmin_s8_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** smin {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svint8x4_t, z28,
+ svmin_s8_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** |
+** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svint8x4_t, z0,
+ svmin_s8_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** |
+** smin {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svint8x4_t, z4,
+ svmin_s8_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svint8x4_t, svint8_t, z24,
+ svmin_single_s8_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** |
+** smin {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svint8x4_t, svint8_t, z24,
+ svmin_single_s8_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svint8x4_t, svint8_t, z24,
+ svmin_single_s8_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svint8x4_t, svint8_t, z1,
+ svmin_single_s8_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svint8x4_t, svint8_t, z1,
+ svmin_single_s8_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smin [^\n]+, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svint8x4_t, svint8_t, z18,
+ svmin_single_s8_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** smin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint8x4_t, svint8_t,
+ z0_res = svmin_single_s8_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint8x4_t, svint8_t,
+ z0 = svmin_single_s8_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svint8x4_t, svint8_t, z24,
+ svmin_single_s8_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** umin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svuint16x2_t, z0,
+ svmin_u16_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** umin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svuint16x2_t, z0,
+ svmin_u16_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.h - z29\.h}
+** |
+** umin [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svuint16x2_t, z0,
+ svmin_u16_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** umin {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svuint16x2_t, z18,
+ svmin_u16_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svuint16x2_t, z23,
+ svmin_u16_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** umin {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svuint16x2_t, z28,
+ svmin_u16_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** umin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svuint16x2_t, z0,
+ svmin_u16_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** umin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svuint16x2_t, z4,
+ svmin_u16_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svuint16x2_t, svuint16_t, z24,
+ svmin_single_u16_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** umin {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svuint16x2_t, svuint16_t, z24,
+ svmin_single_u16_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svuint16x2_t, svuint16_t, z24,
+ svmin_single_u16_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svuint16x2_t, svuint16_t, z1,
+ svmin_single_u16_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** umin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svuint16x2_t, svuint16_t, z1,
+ svmin_single_u16_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** umin {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svuint16x2_t, svuint16_t, z18,
+ svmin_single_u16_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** umin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint16x2_t, svuint16_t,
+ z0_res = svmin_single_u16_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** umin {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint16x2_t, svuint16_t,
+ z0 = svmin_single_u16_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svuint16x2_t, svuint16_t, z24,
+ svmin_single_u16_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svuint16x4_t, z0,
+ svmin_u16_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svuint16x4_t, z0,
+ svmin_u16_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.h - z31\.h}
+** |
+** umin [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svuint16x4_t, z0,
+ svmin_u16_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svuint16x4_t, z18,
+ svmin_u16_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svuint16x4_t, z23,
+ svmin_u16_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** umin {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svuint16x4_t, z28,
+ svmin_u16_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svuint16x4_t, z0,
+ svmin_u16_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** umin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svuint16x4_t, z4,
+ svmin_u16_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svuint16x4_t, svuint16_t, z24,
+ svmin_single_u16_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** umin {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svuint16x4_t, svuint16_t, z24,
+ svmin_single_u16_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svuint16x4_t, svuint16_t, z24,
+ svmin_single_u16_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svuint16x4_t, svuint16_t, z1,
+ svmin_single_u16_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svuint16x4_t, svuint16_t, z1,
+ svmin_single_u16_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svuint16x4_t, svuint16_t, z18,
+ svmin_single_u16_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** umin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint16x4_t, svuint16_t,
+ z0_res = svmin_single_u16_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint16x4_t, svuint16_t,
+ z0 = svmin_single_u16_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svuint16x4_t, svuint16_t, z24,
+ svmin_single_u16_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** umin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svuint32x2_t, z0,
+ svmin_u32_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** umin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svuint32x2_t, z0,
+ svmin_u32_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.s - z29\.s}
+** |
+** umin [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svuint32x2_t, z0,
+ svmin_u32_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** umin {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svuint32x2_t, z18,
+ svmin_u32_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svuint32x2_t, z23,
+ svmin_u32_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** umin {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svuint32x2_t, z28,
+ svmin_u32_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** umin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svuint32x2_t, z0,
+ svmin_u32_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** umin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svuint32x2_t, z4,
+ svmin_u32_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svuint32x2_t, svuint32_t, z24,
+ svmin_single_u32_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** umin {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svuint32x2_t, svuint32_t, z24,
+ svmin_single_u32_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svuint32x2_t, svuint32_t, z24,
+ svmin_single_u32_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svuint32x2_t, svuint32_t, z1,
+ svmin_single_u32_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** umin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svuint32x2_t, svuint32_t, z1,
+ svmin_single_u32_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** umin {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svuint32x2_t, svuint32_t, z18,
+ svmin_single_u32_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** umin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint32x2_t, svuint32_t,
+ z0_res = svmin_single_u32_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** umin {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint32x2_t, svuint32_t,
+ z0 = svmin_single_u32_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svuint32x2_t, svuint32_t, z24,
+ svmin_single_u32_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svuint32x4_t, z0,
+ svmin_u32_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svuint32x4_t, z0,
+ svmin_u32_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.s - z31\.s}
+** |
+** umin [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svuint32x4_t, z0,
+ svmin_u32_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svuint32x4_t, z18,
+ svmin_u32_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svuint32x4_t, z23,
+ svmin_u32_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** umin {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svuint32x4_t, z28,
+ svmin_u32_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svuint32x4_t, z0,
+ svmin_u32_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** umin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svuint32x4_t, z4,
+ svmin_u32_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svuint32x4_t, svuint32_t, z24,
+ svmin_single_u32_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** umin {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svuint32x4_t, svuint32_t, z24,
+ svmin_single_u32_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svuint32x4_t, svuint32_t, z24,
+ svmin_single_u32_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svuint32x4_t, svuint32_t, z1,
+ svmin_single_u32_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svuint32x4_t, svuint32_t, z1,
+ svmin_single_u32_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svuint32x4_t, svuint32_t, z18,
+ svmin_single_u32_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** umin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint32x4_t, svuint32_t,
+ z0_res = svmin_single_u32_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint32x4_t, svuint32_t,
+ z0 = svmin_single_u32_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svuint32x4_t, svuint32_t, z24,
+ svmin_single_u32_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** umin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svuint64x2_t, z0,
+ svmin_u64_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** umin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svuint64x2_t, z0,
+ svmin_u64_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.d - z29\.d}
+** |
+** umin [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svuint64x2_t, z0,
+ svmin_u64_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** umin {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svuint64x2_t, z18,
+ svmin_u64_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svuint64x2_t, z23,
+ svmin_u64_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** umin {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svuint64x2_t, z28,
+ svmin_u64_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** umin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svuint64x2_t, z0,
+ svmin_u64_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** umin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svuint64x2_t, z4,
+ svmin_u64_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svuint64x2_t, svuint64_t, z24,
+ svmin_single_u64_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** umin {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svuint64x2_t, svuint64_t, z24,
+ svmin_single_u64_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svuint64x2_t, svuint64_t, z24,
+ svmin_single_u64_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svuint64x2_t, svuint64_t, z1,
+ svmin_single_u64_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** umin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svuint64x2_t, svuint64_t, z1,
+ svmin_single_u64_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** umin {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svuint64x2_t, svuint64_t, z18,
+ svmin_single_u64_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** umin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint64x2_t, svuint64_t,
+ z0_res = svmin_single_u64_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** umin {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint64x2_t, svuint64_t,
+ z0 = svmin_single_u64_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svuint64x2_t, svuint64_t, z24,
+ svmin_single_u64_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svuint64x4_t, z0,
+ svmin_u64_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svuint64x4_t, z0,
+ svmin_u64_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.d - z31\.d}
+** |
+** umin [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svuint64x4_t, z0,
+ svmin_u64_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svuint64x4_t, z18,
+ svmin_u64_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svuint64x4_t, z23,
+ svmin_u64_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** umin {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svuint64x4_t, z28,
+ svmin_u64_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svuint64x4_t, z0,
+ svmin_u64_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** umin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svuint64x4_t, z4,
+ svmin_u64_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svuint64x4_t, svuint64_t, z24,
+ svmin_single_u64_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** umin {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svuint64x4_t, svuint64_t, z24,
+ svmin_single_u64_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svuint64x4_t, svuint64_t, z24,
+ svmin_single_u64_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svuint64x4_t, svuint64_t, z1,
+ svmin_single_u64_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svuint64x4_t, svuint64_t, z1,
+ svmin_single_u64_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svuint64x4_t, svuint64_t, z18,
+ svmin_single_u64_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** umin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint64x4_t, svuint64_t,
+ z0_res = svmin_single_u64_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint64x4_t, svuint64_t,
+ z0 = svmin_single_u64_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svuint64x4_t, svuint64_t, z24,
+ svmin_single_u64_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** umin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svuint8x2_t, z0,
+ svmin_u8_x2 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** umin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svuint8x2_t, z0,
+ svmin_u8_x2 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.b - z29\.b}
+** |
+** umin [^\n]+, {z28\.b - z29\.b}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svuint8x2_t, z0,
+ svmin_u8_x2 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** umin {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svuint8x2_t, z18,
+ svmin_u8_x2 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z18, svuint8x2_t, z23,
+ svmin_u8_x2 (z23, z18),
+ svmin (z23, z18))
+
+/*
+** min_z28_z28_z0:
+** umin {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svuint8x2_t, z28,
+ svmin_u8_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** umin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svuint8x2_t, z0,
+ svmin_u8_x2 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** |
+** umin {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svuint8x2_t, z4,
+ svmin_u8_x2 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svuint8x2_t, svuint8_t, z24,
+ svmin_single_u8_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** |
+** umin {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svuint8x2_t, svuint8_t, z24,
+ svmin_single_u8_x2 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svuint8x2_t, svuint8_t, z24,
+ svmin_single_u8_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svuint8x2_t, svuint8_t, z1,
+ svmin_single_u8_x2 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** umin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svuint8x2_t, svuint8_t, z1,
+ svmin_single_u8_x2 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** umin {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svuint8x2_t, svuint8_t, z18,
+ svmin_single_u8_x2 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** umin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint8x2_t, svuint8_t,
+ z0_res = svmin_single_u8_x2 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** umin {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint8x2_t, svuint8_t,
+ z0 = svmin_single_u8_x2 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svuint8x2_t, svuint8_t, z24,
+ svmin_single_u8_x2 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** min_z0_z0_z4:
+** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (min_z0_z0_z4, svuint8x4_t, z0,
+ svmin_u8_x4 (z0, z4),
+ svmin (z0, z4))
+
+/*
+** min_z0_z4_z0:
+** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (min_z0_z4_z0, svuint8x4_t, z0,
+ svmin_u8_x4 (z4, z0),
+ svmin (z4, z0))
+
+/*
+** min_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.b - z31\.b}
+** |
+** umin [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z4_z28, svuint8x4_t, z0,
+ svmin_u8_x4 (z4, z28),
+ svmin (z4, z28))
+
+/*
+** min_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z18_z18_z4, svuint8x4_t, z18,
+ svmin_u8_x4 (z18, z4),
+ svmin (z18, z4))
+
+/*
+** min_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (min_z23_z23_z28, svuint8x4_t, z23,
+ svmin_u8_x4 (z23, z28),
+ svmin (z23, z28))
+
+/*
+** min_z28_z28_z0:
+** umin {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (min_z28_z28_z0, svuint8x4_t, z28,
+ svmin_u8_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** |
+** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z0_z0_z18, svuint8x4_t, z0,
+ svmin_u8_x4 (z0, z18),
+ svmin (z0, z18))
+
+/*
+** min_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** |
+** umin {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (min_z4_z4_z23, svuint8x4_t, z4,
+ svmin_u8_x4 (z4, z23),
+ svmin (z4, z23))
+
+/*
+** min_single_z24_z24_z0:
+** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z0, svuint8x4_t, svuint8_t, z24,
+ svmin_single_u8_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** |
+** umin {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z28_z0, svuint8x4_t, svuint8_t, z24,
+ svmin_single_u8_x4 (z28, z0),
+ svmin (z28, z0))
+
+/*
+** min_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z1_z0, svuint8x4_t, svuint8_t, z24,
+ svmin_single_u8_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z1_z24_z0:
+** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z24_z0, svuint8x4_t, svuint8_t, z1,
+ svmin_single_u8_x4 (z24, z0),
+ svmin (z24, z0))
+
+/*
+** min_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z1_z1_z0, svuint8x4_t, svuint8_t, z1,
+ svmin_single_u8_x4 (z1, z0),
+ svmin (z1, z0))
+
+/*
+** min_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umin [^\n]+, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (min_single_z18_z18_z0, svuint8x4_t, svuint8_t, z18,
+ svmin_single_u8_x4 (z18, z0),
+ svmin (z18, z0))
+
+/*
+** min_single_awkward:
+** ...
+** umin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint8x4_t, svuint8_t,
+ z0_res = svmin_single_u8_x4 (z1, z0),
+ z0_res = svmin (z1, z0))
+
+/*
+** min_single_z0_z0_z15:
+** ...
+** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint8x4_t, svuint8_t,
+ z0 = svmin_single_u8_x4 (z0, z15),
+ z0 = svmin (z0, z15))
+
+/*
+** min_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (min_single_z24_z24_z16, svuint8x4_t, svuint8_t, z24,
+ svmin_single_u8_x4 (z24, z16),
+ svmin (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** minnm_z0_z0_z4:
+** fminnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (minnm_z0_z0_z4, svfloat16x2_t, z0,
+ svminnm_f16_x2 (z0, z4),
+ svminnm (z0, z4))
+
+/*
+** minnm_z0_z4_z0:
+** fminnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (minnm_z0_z4_z0, svfloat16x2_t, z0,
+ svminnm_f16_x2 (z4, z0),
+ svminnm (z4, z0))
+
+/*
+** minnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z28\.h - z29\.h}
+** |
+** fminnm [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z0_z4_z28, svfloat16x2_t, z0,
+ svminnm_f16_x2 (z4, z28),
+ svminnm (z4, z28))
+
+/*
+** minnm_z18_z18_z4:
+** fminnm {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (minnm_z18_z18_z4, svfloat16x2_t, z18,
+ svminnm_f16_x2 (z18, z4),
+ svminnm (z18, z4))
+
+/*
+** minnm_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (minnm_z23_z23_z18, svfloat16x2_t, z23,
+ svminnm_f16_x2 (z23, z18),
+ svminnm (z23, z18))
+
+/*
+** minnm_z28_z28_z0:
+** fminnm {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (minnm_z28_z28_z0, svfloat16x2_t, z28,
+ svminnm_f16_x2 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_z0_z0_z18:
+** fminnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (minnm_z0_z0_z18, svfloat16x2_t, z0,
+ svminnm_f16_x2 (z0, z18),
+ svminnm (z0, z18))
+
+/*
+** minnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** fminnm {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z4_z4_z23, svfloat16x2_t, z4,
+ svminnm_f16_x2 (z4, z23),
+ svminnm (z4, z23))
+
+/*
+** minnm_single_z24_z24_z0:
+** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24,
+ svminnm_single_f16_x2 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** fminnm {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24,
+ svminnm_single_f16_x2 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24,
+ svminnm_single_f16_x2 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z1_z24_z0:
+** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1,
+ svminnm_single_f16_x2 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fminnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1,
+ svminnm_single_f16_x2 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z18_z18_z0:
+** fminnm {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18,
+ svminnm_single_f16_x2 (z18, z0),
+ svminnm (z18, z0))
+
+/*
+** minnm_single_awkward:
+** ...
+** fminnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat16x2_t, svfloat16_t,
+ z0_res = svminnm_single_f16_x2 (z1, z0),
+ z0_res = svminnm (z1, z0))
+
+/*
+** minnm_single_z0_z0_z15:
+** ...
+** fminnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat16x2_t, svfloat16_t,
+ z0 = svminnm_single_f16_x2 (z0, z15),
+ z0 = svminnm (z0, z15))
+
+/*
+** minnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24,
+ svminnm_single_f16_x2 (z24, z16),
+ svminnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** minnm_z0_z0_z4:
+** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (minnm_z0_z0_z4, svfloat16x4_t, z0,
+ svminnm_f16_x4 (z0, z4),
+ svminnm (z0, z4))
+
+/*
+** minnm_z0_z4_z0:
+** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (minnm_z0_z4_z0, svfloat16x4_t, z0,
+ svminnm_f16_x4 (z4, z0),
+ svminnm (z4, z0))
+
+/*
+** minnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z28\.h - z31\.h}
+** |
+** fminnm [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z0_z4_z28, svfloat16x4_t, z0,
+ svminnm_f16_x4 (z4, z28),
+ svminnm (z4, z28))
+
+/*
+** minnm_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (minnm_z18_z18_z4, svfloat16x4_t, z18,
+ svminnm_f16_x4 (z18, z4),
+ svminnm (z18, z4))
+
+/*
+** minnm_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (minnm_z23_z23_z28, svfloat16x4_t, z23,
+ svminnm_f16_x4 (z23, z28),
+ svminnm (z23, z28))
+
+/*
+** minnm_z28_z28_z0:
+** fminnm {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (minnm_z28_z28_z0, svfloat16x4_t, z28,
+ svminnm_f16_x4 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z0_z0_z18, svfloat16x4_t, z0,
+ svminnm_f16_x4 (z0, z18),
+ svminnm (z0, z18))
+
+/*
+** minnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** fminnm {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z4_z4_z23, svfloat16x4_t, z4,
+ svminnm_f16_x4 (z4, z23),
+ svminnm (z4, z23))
+
+/*
+** minnm_single_z24_z24_z0:
+** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24,
+ svminnm_single_f16_x4 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** fminnm {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24,
+ svminnm_single_f16_x4 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24,
+ svminnm_single_f16_x4 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z1_z24_z0:
+** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1,
+ svminnm_single_f16_x4 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1,
+ svminnm_single_f16_x4 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18,
+ svminnm_single_f16_x4 (z18, z0),
+ svminnm (z18, z0))
+
+/*
+** minnm_single_awkward:
+** ...
+** fminnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat16x4_t, svfloat16_t,
+ z0_res = svminnm_single_f16_x4 (z1, z0),
+ z0_res = svminnm (z1, z0))
+
+/*
+** minnm_single_z0_z0_z15:
+** ...
+** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat16x4_t, svfloat16_t,
+ z0 = svminnm_single_f16_x4 (z0, z15),
+ z0 = svminnm (z0, z15))
+
+/*
+** minnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24,
+ svminnm_single_f16_x4 (z24, z16),
+ svminnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** minnm_z0_z0_z4:
+** fminnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (minnm_z0_z0_z4, svfloat32x2_t, z0,
+ svminnm_f32_x2 (z0, z4),
+ svminnm (z0, z4))
+
+/*
+** minnm_z0_z4_z0:
+** fminnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (minnm_z0_z4_z0, svfloat32x2_t, z0,
+ svminnm_f32_x2 (z4, z0),
+ svminnm (z4, z0))
+
+/*
+** minnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z28\.s - z29\.s}
+** |
+** fminnm [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z0_z4_z28, svfloat32x2_t, z0,
+ svminnm_f32_x2 (z4, z28),
+ svminnm (z4, z28))
+
+/*
+** minnm_z18_z18_z4:
+** fminnm {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (minnm_z18_z18_z4, svfloat32x2_t, z18,
+ svminnm_f32_x2 (z18, z4),
+ svminnm (z18, z4))
+
+/*
+** minnm_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (minnm_z23_z23_z18, svfloat32x2_t, z23,
+ svminnm_f32_x2 (z23, z18),
+ svminnm (z23, z18))
+
+/*
+** minnm_z28_z28_z0:
+** fminnm {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (minnm_z28_z28_z0, svfloat32x2_t, z28,
+ svminnm_f32_x2 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_z0_z0_z18:
+** fminnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (minnm_z0_z0_z18, svfloat32x2_t, z0,
+ svminnm_f32_x2 (z0, z18),
+ svminnm (z0, z18))
+
+/*
+** minnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** fminnm {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z4_z4_z23, svfloat32x2_t, z4,
+ svminnm_f32_x2 (z4, z23),
+ svminnm (z4, z23))
+
+/*
+** minnm_single_z24_z24_z0:
+** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24,
+ svminnm_single_f32_x2 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** fminnm {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24,
+ svminnm_single_f32_x2 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24,
+ svminnm_single_f32_x2 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z1_z24_z0:
+** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1,
+ svminnm_single_f32_x2 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fminnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1,
+ svminnm_single_f32_x2 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z18_z18_z0:
+** fminnm {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18,
+ svminnm_single_f32_x2 (z18, z0),
+ svminnm (z18, z0))
+
+/*
+** minnm_single_awkward:
+** ...
+** fminnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat32x2_t, svfloat32_t,
+ z0_res = svminnm_single_f32_x2 (z1, z0),
+ z0_res = svminnm (z1, z0))
+
+/*
+** minnm_single_z0_z0_z15:
+** ...
+** fminnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat32x2_t, svfloat32_t,
+ z0 = svminnm_single_f32_x2 (z0, z15),
+ z0 = svminnm (z0, z15))
+
+/*
+** minnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24,
+ svminnm_single_f32_x2 (z24, z16),
+ svminnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** minnm_z0_z0_z4:
+** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (minnm_z0_z0_z4, svfloat32x4_t, z0,
+ svminnm_f32_x4 (z0, z4),
+ svminnm (z0, z4))
+
+/*
+** minnm_z0_z4_z0:
+** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (minnm_z0_z4_z0, svfloat32x4_t, z0,
+ svminnm_f32_x4 (z4, z0),
+ svminnm (z4, z0))
+
+/*
+** minnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z28\.s - z31\.s}
+** |
+** fminnm [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z0_z4_z28, svfloat32x4_t, z0,
+ svminnm_f32_x4 (z4, z28),
+ svminnm (z4, z28))
+
+/*
+** minnm_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (minnm_z18_z18_z4, svfloat32x4_t, z18,
+ svminnm_f32_x4 (z18, z4),
+ svminnm (z18, z4))
+
+/*
+** minnm_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (minnm_z23_z23_z28, svfloat32x4_t, z23,
+ svminnm_f32_x4 (z23, z28),
+ svminnm (z23, z28))
+
+/*
+** minnm_z28_z28_z0:
+** fminnm {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (minnm_z28_z28_z0, svfloat32x4_t, z28,
+ svminnm_f32_x4 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z0_z0_z18, svfloat32x4_t, z0,
+ svminnm_f32_x4 (z0, z18),
+ svminnm (z0, z18))
+
+/*
+** minnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** fminnm {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z4_z4_z23, svfloat32x4_t, z4,
+ svminnm_f32_x4 (z4, z23),
+ svminnm (z4, z23))
+
+/*
+** minnm_single_z24_z24_z0:
+** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24,
+ svminnm_single_f32_x4 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** fminnm {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24,
+ svminnm_single_f32_x4 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24,
+ svminnm_single_f32_x4 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z1_z24_z0:
+** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1,
+ svminnm_single_f32_x4 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1,
+ svminnm_single_f32_x4 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18,
+ svminnm_single_f32_x4 (z18, z0),
+ svminnm (z18, z0))
+
+/*
+** minnm_single_awkward:
+** ...
+** fminnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat32x4_t, svfloat32_t,
+ z0_res = svminnm_single_f32_x4 (z1, z0),
+ z0_res = svminnm (z1, z0))
+
+/*
+** minnm_single_z0_z0_z15:
+** ...
+** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat32x4_t, svfloat32_t,
+ z0 = svminnm_single_f32_x4 (z0, z15),
+ z0 = svminnm (z0, z15))
+
+/*
+** minnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24,
+ svminnm_single_f32_x4 (z24, z16),
+ svminnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** minnm_z0_z0_z4:
+** fminnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (minnm_z0_z0_z4, svfloat64x2_t, z0,
+ svminnm_f64_x2 (z0, z4),
+ svminnm (z0, z4))
+
+/*
+** minnm_z0_z4_z0:
+** fminnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (minnm_z0_z4_z0, svfloat64x2_t, z0,
+ svminnm_f64_x2 (z4, z0),
+ svminnm (z4, z0))
+
+/*
+** minnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z28\.d - z29\.d}
+** |
+** fminnm [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z0_z4_z28, svfloat64x2_t, z0,
+ svminnm_f64_x2 (z4, z28),
+ svminnm (z4, z28))
+
+/*
+** minnm_z18_z18_z4:
+** fminnm {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (minnm_z18_z18_z4, svfloat64x2_t, z18,
+ svminnm_f64_x2 (z18, z4),
+ svminnm (z18, z4))
+
+/*
+** minnm_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (minnm_z23_z23_z18, svfloat64x2_t, z23,
+ svminnm_f64_x2 (z23, z18),
+ svminnm (z23, z18))
+
+/*
+** minnm_z28_z28_z0:
+** fminnm {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (minnm_z28_z28_z0, svfloat64x2_t, z28,
+ svminnm_f64_x2 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_z0_z0_z18:
+** fminnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (minnm_z0_z0_z18, svfloat64x2_t, z0,
+ svminnm_f64_x2 (z0, z18),
+ svminnm (z0, z18))
+
+/*
+** minnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** fminnm {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z4_z4_z23, svfloat64x2_t, z4,
+ svminnm_f64_x2 (z4, z23),
+ svminnm (z4, z23))
+
+/*
+** minnm_single_z24_z24_z0:
+** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24,
+ svminnm_single_f64_x2 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** fminnm {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24,
+ svminnm_single_f64_x2 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24,
+ svminnm_single_f64_x2 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z1_z24_z0:
+** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1,
+ svminnm_single_f64_x2 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fminnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1,
+ svminnm_single_f64_x2 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z18_z18_z0:
+** fminnm {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18,
+ svminnm_single_f64_x2 (z18, z0),
+ svminnm (z18, z0))
+
+/*
+** minnm_single_awkward:
+** ...
+** fminnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat64x2_t, svfloat64_t,
+ z0_res = svminnm_single_f64_x2 (z1, z0),
+ z0_res = svminnm (z1, z0))
+
+/*
+** minnm_single_z0_z0_z15:
+** ...
+** fminnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat64x2_t, svfloat64_t,
+ z0 = svminnm_single_f64_x2 (z0, z15),
+ z0 = svminnm (z0, z15))
+
+/*
+** minnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24,
+ svminnm_single_f64_x2 (z24, z16),
+ svminnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** minnm_z0_z0_z4:
+** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (minnm_z0_z0_z4, svfloat64x4_t, z0,
+ svminnm_f64_x4 (z0, z4),
+ svminnm (z0, z4))
+
+/*
+** minnm_z0_z4_z0:
+** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (minnm_z0_z4_z0, svfloat64x4_t, z0,
+ svminnm_f64_x4 (z4, z0),
+ svminnm (z4, z0))
+
+/*
+** minnm_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z28\.d - z31\.d}
+** |
+** fminnm [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z0_z4_z28, svfloat64x4_t, z0,
+ svminnm_f64_x4 (z4, z28),
+ svminnm (z4, z28))
+
+/*
+** minnm_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (minnm_z18_z18_z4, svfloat64x4_t, z18,
+ svminnm_f64_x4 (z18, z4),
+ svminnm (z18, z4))
+
+/*
+** minnm_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (minnm_z23_z23_z28, svfloat64x4_t, z23,
+ svminnm_f64_x4 (z23, z28),
+ svminnm (z23, z28))
+
+/*
+** minnm_z28_z28_z0:
+** fminnm {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (minnm_z28_z28_z0, svfloat64x4_t, z28,
+ svminnm_f64_x4 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z0_z0_z18, svfloat64x4_t, z0,
+ svminnm_f64_x4 (z0, z18),
+ svminnm (z0, z18))
+
+/*
+** minnm_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** fminnm {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (minnm_z4_z4_z23, svfloat64x4_t, z4,
+ svminnm_f64_x4 (z4, z23),
+ svminnm (z4, z23))
+
+/*
+** minnm_single_z24_z24_z0:
+** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24,
+ svminnm_single_f64_x4 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** fminnm {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24,
+ svminnm_single_f64_x4 (z28, z0),
+ svminnm (z28, z0))
+
+/*
+** minnm_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24,
+ svminnm_single_f64_x4 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z1_z24_z0:
+** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1,
+ svminnm_single_f64_x4 (z24, z0),
+ svminnm (z24, z0))
+
+/*
+** minnm_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1,
+ svminnm_single_f64_x4 (z1, z0),
+ svminnm (z1, z0))
+
+/*
+** minnm_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fminnm [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18,
+ svminnm_single_f64_x4 (z18, z0),
+ svminnm (z18, z0))
+
+/*
+** minnm_single_awkward:
+** ...
+** fminnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat64x4_t, svfloat64_t,
+ z0_res = svminnm_single_f64_x4 (z1, z0),
+ z0_res = svminnm (z1, z0))
+
+/*
+** minnm_single_z0_z0_z15:
+** ...
+** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat64x4_t, svfloat64_t,
+ z0 = svminnm_single_f64_x4 (z0, z15),
+ z0 = svminnm (z0, z15))
+
+/*
+** minnm_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24,
+ svminnm_single_f64_x4 (z24, z16),
+ svminnm (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_0_z0_z0_0, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (0, z0, z0, 0),
+ svmla_lane_za32_vg2x1 (0, z0, z0, 0))
+
+/*
+** mla_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w0, z0, z3, 1),
+ svmla_lane_za32_vg2x1 (w0, z0, z3, 1))
+
+/*
+** mla_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w7, z0, z3, 2),
+ svmla_lane_za32_vg2x1 (w7, z0, z3, 2))
+
+/*
+** mla_lane_w8_z7_z3_3:
+** bfmlal za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w8, z7, z3, 3),
+ svmla_lane_za32_vg2x1 (w8, z7, z3, 3))
+
+/*
+** mla_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** bfmlal za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w8, z31, z16, 4),
+ svmla_lane_za32_vg2x1 (w8, z31, z16, 4))
+
+/*
+** mla_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w8 + 1, z0, z0, 5),
+ svmla_lane_za32_vg2x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mla_lane_w8p2_z23_z0_6:
+** bfmlal za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w8 + 2, z23, z0, 6),
+ svmla_lane_za32_vg2x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mla_lane_w11p6_z23_z0_7:
+** bfmlal za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p6_z23_z0_7, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w11 + 6, z23, z0, 7),
+ svmla_lane_za32_vg2x1 (w11 + 6, z23, z0, 7))
+
+/*
+** mla_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w8 + 7, z7, z7, 0),
+ svmla_lane_za32_vg2x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mla_lane_w11p10_z23_z0_1:
+** bfmlal za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p10_z23_z0_1, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w11 + 10, z23, z0, 1),
+ svmla_lane_za32_vg2x1 (w11 + 10, z23, z0, 1))
+
+/*
+** mla_lane_w8p14_z23_z0_2:
+** bfmlal za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w8 + 14, z23, z0, 2),
+ svmla_lane_za32_vg2x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mla_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w8 + 15, z7, z7, 3),
+ svmla_lane_za32_vg2x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mla_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w8 + 16, z7, z7, 4),
+ svmla_lane_za32_vg2x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mla_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w8 - 1, z16, z0, 5),
+ svmla_lane_za32_vg2x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mla_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x1 (w12, z0, z3, 6),
+ svmla_lane_za32_vg2x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (0, z0, z4, 0),
+ svmla_lane_za32_vg2x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w0, z0, z7, 1),
+ svmla_lane_za32_vg2x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w8, z28, z4, 2),
+ svmla_lane_za32_vg2x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p6_z0_z4_7:
+** bfmlal za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w8 + 6, z0, z4, 7),
+ svmla_lane_za32_vg2x2 (w8 + 6, z0, z4, 7))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg2x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w8 + 8, z0, z4, 4),
+ svmla_lane_za32_vg2x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w0 - 1, z0, z4, 5),
+ svmla_lane_za32_vg2x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w8, z4, z15, 6),
+ svmla_lane_za32_vg2x2 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w8, z28, z16, 7),
+ svmla_lane_za32_vg2x2 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** bfmlal za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w8, z17, z7, 0),
+ svmla_lane_za32_vg2x2 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x2 (w8, z22, z4, 1),
+ svmla_lane_za32_vg2x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (0, z0, z4, 0),
+ svmla_lane_za32_vg2x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w0, z0, z7, 1),
+ svmla_lane_za32_vg2x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w8, z28, z4, 2),
+ svmla_lane_za32_vg2x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p6_z0_z4_7:
+** bfmlal za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w8 + 6, z0, z4, 7),
+ svmla_lane_za32_vg2x4 (w8 + 6, z0, z4, 7))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg2x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w8 + 8, z0, z4, 4),
+ svmla_lane_za32_vg2x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w0 - 1, z0, z4, 5),
+ svmla_lane_za32_vg2x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w8, z4, z15, 6),
+ svmla_lane_za32_vg2x4 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w8, z28, z16, 7),
+ svmla_lane_za32_vg2x4 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** bfmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w8, z17, z7, 0),
+ svmla_lane_za32_vg2x4 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** bfmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svbfloat16x4_t, svbfloat16_t,
+ svmla_lane_za32_bf16_vg2x4 (w8, z22, z4, 1),
+ svmla_lane_za32_vg2x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** fmlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_0_z0_z0_0, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (0, z0, z0, 0),
+ svmla_lane_za32_vg2x1 (0, z0, z0, 0))
+
+/*
+** mla_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w0, z0, z3, 1),
+ svmla_lane_za32_vg2x1 (w0, z0, z3, 1))
+
+/*
+** mla_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w7, z0, z3, 2),
+ svmla_lane_za32_vg2x1 (w7, z0, z3, 2))
+
+/*
+** mla_lane_w8_z7_z3_3:
+** fmlal za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w8, z7, z3, 3),
+ svmla_lane_za32_vg2x1 (w8, z7, z3, 3))
+
+/*
+** mla_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** fmlal za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w8, z31, z16, 4),
+ svmla_lane_za32_vg2x1 (w8, z31, z16, 4))
+
+/*
+** mla_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w8 + 1, z0, z0, 5),
+ svmla_lane_za32_vg2x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mla_lane_w8p2_z23_z0_6:
+** fmlal za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w8 + 2, z23, z0, 6),
+ svmla_lane_za32_vg2x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mla_lane_w11p6_z23_z0_7:
+** fmlal za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p6_z23_z0_7, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w11 + 6, z23, z0, 7),
+ svmla_lane_za32_vg2x1 (w11 + 6, z23, z0, 7))
+
+/*
+** mla_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w8 + 7, z7, z7, 0),
+ svmla_lane_za32_vg2x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mla_lane_w11p10_z23_z0_1:
+** fmlal za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p10_z23_z0_1, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w11 + 10, z23, z0, 1),
+ svmla_lane_za32_vg2x1 (w11 + 10, z23, z0, 1))
+
+/*
+** mla_lane_w8p14_z23_z0_2:
+** fmlal za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w8 + 14, z23, z0, 2),
+ svmla_lane_za32_vg2x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mla_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w8 + 15, z7, z7, 3),
+ svmla_lane_za32_vg2x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mla_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w8 + 16, z7, z7, 4),
+ svmla_lane_za32_vg2x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mla_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w8 - 1, z16, z0, 5),
+ svmla_lane_za32_vg2x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mla_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svfloat16_t,
+ svmla_lane_za32_f16_vg2x1 (w12, z0, z3, 6),
+ svmla_lane_za32_vg2x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (0, z0, z4, 0),
+ svmla_lane_za32_vg2x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w0, z0, z7, 1),
+ svmla_lane_za32_vg2x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** fmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w8, z28, z4, 2),
+ svmla_lane_za32_vg2x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p6_z0_z4_7:
+** fmlal za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w8 + 6, z0, z4, 7),
+ svmla_lane_za32_vg2x2 (w8 + 6, z0, z4, 7))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg2x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w8 + 8, z0, z4, 4),
+ svmla_lane_za32_vg2x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w0 - 1, z0, z4, 5),
+ svmla_lane_za32_vg2x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** fmlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w8, z4, z15, 6),
+ svmla_lane_za32_vg2x2 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** fmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w8, z28, z16, 7),
+ svmla_lane_za32_vg2x2 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** fmlal za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w8, z17, z7, 0),
+ svmla_lane_za32_vg2x2 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** fmlal za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x2 (w8, z22, z4, 1),
+ svmla_lane_za32_vg2x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (0, z0, z4, 0),
+ svmla_lane_za32_vg2x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w0, z0, z7, 1),
+ svmla_lane_za32_vg2x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** fmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w8, z28, z4, 2),
+ svmla_lane_za32_vg2x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p6_z0_z4_7:
+** fmlal za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w8 + 6, z0, z4, 7),
+ svmla_lane_za32_vg2x4 (w8 + 6, z0, z4, 7))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg2x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w8 + 8, z0, z4, 4),
+ svmla_lane_za32_vg2x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w0 - 1, z0, z4, 5),
+ svmla_lane_za32_vg2x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** fmlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w8, z4, z15, 6),
+ svmla_lane_za32_vg2x4 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** fmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w8, z28, z16, 7),
+ svmla_lane_za32_vg2x4 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w8, z17, z7, 0),
+ svmla_lane_za32_vg2x4 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t,
+ svmla_lane_za32_f16_vg2x4 (w8, z22, z4, 1),
+ svmla_lane_za32_vg2x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (0, z0, z4, 0),
+ svmla_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z7\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (w0, z0, z7, 1),
+ svmla_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** fmla za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}, z4\.s\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (w8, z28, z4, 2),
+ svmla_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** fmla za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}, z4\.s\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_0, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (w8 + 8, z0, z4, 0),
+ svmla_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** mla_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_1, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (w0 - 1, z0, z4, 1),
+ svmla_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** mla_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fmla za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, z15\.s\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_2, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (w8, z4, z15, 2),
+ svmla_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** mla_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fmla za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}, \1\.s\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (w8, z28, z16, 3),
+ svmla_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** fmla za\.s\[w8, 0, vgx2\], [^\n]+, z7\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (w8, z17, z7, 0),
+ svmla_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** fmla za\.s\[w8, 0, vgx2\], {z22\.s - z23\.s}, z4\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat32x2_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x2 (w8, z22, z4, 1),
+ svmla_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (0, z0, z4, 0),
+ svmla_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z7\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (w0, z0, z7, 1),
+ svmla_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** fmla za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, z4\.s\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (w8, z28, z4, 2),
+ svmla_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** fmla za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}, z4\.s\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_0, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (w8 + 8, z0, z4, 0),
+ svmla_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** mla_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_1, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (w0 - 1, z0, z4, 1),
+ svmla_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** mla_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fmla za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}, z15\.s\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_2, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (w8, z4, z15, 2),
+ svmla_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** mla_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fmla za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, \1\.s\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (w8, z28, z16, 3),
+ svmla_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmla za\.s\[w8, 0, vgx4\], [^\n]+, z7\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (w8, z17, z7, 0),
+ svmla_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmla za\.s\[w8, 0, vgx4\], [^\n]+, z4\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat32x4_t, svfloat32_t,
+ svmla_lane_za32_f32_vg1x4 (w8, z22, z4, 1),
+ svmla_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** smlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_0_z0_z0_0, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (0, z0, z0, 0),
+ svmla_lane_za32_vg2x1 (0, z0, z0, 0))
+
+/*
+** mla_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w0, z0, z3, 1),
+ svmla_lane_za32_vg2x1 (w0, z0, z3, 1))
+
+/*
+** mla_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w7, z0, z3, 2),
+ svmla_lane_za32_vg2x1 (w7, z0, z3, 2))
+
+/*
+** mla_lane_w8_z7_z3_3:
+** smlal za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w8, z7, z3, 3),
+ svmla_lane_za32_vg2x1 (w8, z7, z3, 3))
+
+/*
+** mla_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** smlal za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w8, z31, z16, 4),
+ svmla_lane_za32_vg2x1 (w8, z31, z16, 4))
+
+/*
+** mla_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w8 + 1, z0, z0, 5),
+ svmla_lane_za32_vg2x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mla_lane_w8p2_z23_z0_6:
+** smlal za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w8 + 2, z23, z0, 6),
+ svmla_lane_za32_vg2x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mla_lane_w11p6_z23_z0_7:
+** smlal za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p6_z23_z0_7, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w11 + 6, z23, z0, 7),
+ svmla_lane_za32_vg2x1 (w11 + 6, z23, z0, 7))
+
+/*
+** mla_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w8 + 7, z7, z7, 0),
+ svmla_lane_za32_vg2x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mla_lane_w11p10_z23_z0_1:
+** smlal za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p10_z23_z0_1, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w11 + 10, z23, z0, 1),
+ svmla_lane_za32_vg2x1 (w11 + 10, z23, z0, 1))
+
+/*
+** mla_lane_w8p14_z23_z0_2:
+** smlal za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w8 + 14, z23, z0, 2),
+ svmla_lane_za32_vg2x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mla_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w8 + 15, z7, z7, 3),
+ svmla_lane_za32_vg2x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mla_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w8 + 16, z7, z7, 4),
+ svmla_lane_za32_vg2x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mla_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w8 - 1, z16, z0, 5),
+ svmla_lane_za32_vg2x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mla_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svint16_t,
+ svmla_lane_za32_s16_vg2x1 (w12, z0, z3, 6),
+ svmla_lane_za32_vg2x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (0, z0, z4, 0),
+ svmla_lane_za32_vg2x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w0, z0, z7, 1),
+ svmla_lane_za32_vg2x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** smlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w8, z28, z4, 2),
+ svmla_lane_za32_vg2x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p6_z0_z4_7:
+** smlal za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w8 + 6, z0, z4, 7),
+ svmla_lane_za32_vg2x2 (w8 + 6, z0, z4, 7))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg2x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w8 + 8, z0, z4, 4),
+ svmla_lane_za32_vg2x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w0 - 1, z0, z4, 5),
+ svmla_lane_za32_vg2x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** smlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w8, z4, z15, 6),
+ svmla_lane_za32_vg2x2 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** smlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w8, z28, z16, 7),
+ svmla_lane_za32_vg2x2 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** smlal za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w8, z17, z7, 0),
+ svmla_lane_za32_vg2x2 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** smlal za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svint16x2_t, svint16_t,
+ svmla_lane_za32_s16_vg2x2 (w8, z22, z4, 1),
+ svmla_lane_za32_vg2x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (0, z0, z4, 0),
+ svmla_lane_za32_vg2x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w0, z0, z7, 1),
+ svmla_lane_za32_vg2x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** smlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w8, z28, z4, 2),
+ svmla_lane_za32_vg2x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p6_z0_z4_7:
+** smlal za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w8 + 6, z0, z4, 7),
+ svmla_lane_za32_vg2x4 (w8 + 6, z0, z4, 7))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg2x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w8 + 8, z0, z4, 4),
+ svmla_lane_za32_vg2x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w0 - 1, z0, z4, 5),
+ svmla_lane_za32_vg2x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** smlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w8, z4, z15, 6),
+ svmla_lane_za32_vg2x4 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** smlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w8, z28, z16, 7),
+ svmla_lane_za32_vg2x4 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w8, z17, z7, 0),
+ svmla_lane_za32_vg2x4 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svint16x4_t, svint16_t,
+ svmla_lane_za32_s16_vg2x4 (w8, z22, z4, 1),
+ svmla_lane_za32_vg2x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_0_z0_z0_0, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (0, z0, z0, 0),
+ svmla_lane_za32_vg4x1 (0, z0, z0, 0))
+
+/*
+** mla_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w0, z0, z3, 1),
+ svmla_lane_za32_vg4x1 (w0, z0, z3, 1))
+
+/*
+** mla_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w7, z0, z3, 2),
+ svmla_lane_za32_vg4x1 (w7, z0, z3, 2))
+
+/*
+** mla_lane_w8_z7_z3_3:
+** smlall za\.s\[w8, 0:3\], z7\.b, z3\.b\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w8, z7, z3, 3),
+ svmla_lane_za32_vg4x1 (w8, z7, z3, 3))
+
+/*
+** mla_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** smlall za\.s\[w8, 0:3\], z31\.b. \1\.b\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w8, z31, z16, 4),
+ svmla_lane_za32_vg4x1 (w8, z31, z16, 4))
+
+/*
+** mla_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w8 + 1, z0, z0, 5),
+ svmla_lane_za32_vg4x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mla_lane_w8p2_z23_z0_6:
+** add (w8|w9|w10|w11), w8, #?2
+** smlall za\.s\[\1, 0:3\], z23\.b, z0\.b\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w8 + 2, z23, z0, 6),
+ svmla_lane_za32_vg4x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mla_lane_w11p4_z23_z0_7:
+** smlall za\.s\[w11, 4:7\], z23\.b, z0\.b\[7\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p4_z23_z0_7, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w11 + 4, z23, z0, 7),
+ svmla_lane_za32_vg4x1 (w11 + 4, z23, z0, 7))
+
+/*
+** mla_lane_w8p7_z7_z7_8:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[8\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p7_z7_z7_8, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w8 + 7, z7, z7, 8),
+ svmla_lane_za32_vg4x1 (w8 + 7, z7, z7, 8))
+
+/*
+** mla_lane_w11p12_z23_z0_9:
+** smlall za\.s\[w11, 12:15\], z23\.b, z0\.b\[9\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p12_z23_z0_9, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w11 + 12, z23, z0, 9),
+ svmla_lane_za32_vg4x1 (w11 + 12, z23, z0, 9))
+
+/*
+** mla_lane_w8p14_z23_z0_10:
+** add (w8|w9|w10|w11), w8, #?14
+** smlall za\.s\[w8, 0:3\], z23\.b, z0\.b\[10\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p14_z23_z0_10, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w8 + 14, z23, z0, 10),
+ svmla_lane_za32_vg4x1 (w8 + 14, z23, z0, 10))
+
+/*
+** mla_lane_w8p15_z7_z7_11:
+** add (w8|w9|w10|w11), w8, #?15
+** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[11\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p15_z7_z7_11, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w8 + 15, z7, z7, 11),
+ svmla_lane_za32_vg4x1 (w8 + 15, z7, z7, 11))
+
+/*
+** mla_lane_w8p16_z7_z7_12:
+** add (w8|w9|w10|w11), w8, #?16
+** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[12\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p16_z7_z7_12, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w8 + 16, z7, z7, 12),
+ svmla_lane_za32_vg4x1 (w8 + 16, z7, z7, 12))
+
+/*
+** mla_lane_w8m1_z16_z0_13:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3\], z16\.b, z0\.b\[13\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8m1_z16_z0_13, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w8 - 1, z16, z0, 13),
+ svmla_lane_za32_vg4x1 (w8 - 1, z16, z0, 13))
+
+/*
+** mla_lane_w12_z0_z3_15:
+** mov (w8|w9|w10|w11), w12
+** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[15\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w12_z0_z3_15, svint8_t,
+ svmla_lane_za32_s8_vg4x1 (w12, z0, z3, 15),
+ svmla_lane_za32_vg4x1 (w12, z0, z3, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (0, z0, z4, 0),
+ svmla_lane_za32_vg4x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w0, z0, z7, 1),
+ svmla_lane_za32_vg4x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** smlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w8, z28, z4, 2),
+ svmla_lane_za32_vg4x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w11p4_z0_z4_3:
+** smlall za\.s\[w11, 4:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w11 + 4, z0, z4, 3),
+ svmla_lane_za32_vg4x2 (w11 + 4, z0, z4, 3))
+
+/*
+** mla_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w8 + 6, z0, z4, 4),
+ svmla_lane_za32_vg4x2 (w8 + 6, z0, z4, 4))
+
+/*
+** mla_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w8 + 7, z0, z4, 5),
+ svmla_lane_za32_vg4x2 (w8 + 7, z0, z4, 5))
+
+/*
+** mla_lane_w8p8_z0_z4_7:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_7, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w8 + 8, z0, z4, 7),
+ svmla_lane_za32_vg4x2 (w8 + 8, z0, z4, 7))
+
+/*
+** mla_lane_w0m1_z0_z4_9:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_9, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w0 - 1, z0, z4, 9),
+ svmla_lane_za32_vg4x2 (w0 - 1, z0, z4, 9))
+
+/*
+** mla_lane_w8_z4_z15_10:
+** str d15, \[sp, #?-16\]!
+** smlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_10, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w8, z4, z15, 10),
+ svmla_lane_za32_vg4x2 (w8, z4, z15, 10))
+
+/*
+** mla_lane_w8_z28_z16_11:
+** mov (z[0-7]).d, z16.d
+** smlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_11, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w8, z28, z16, 11),
+ svmla_lane_za32_vg4x2 (w8, z28, z16, 11))
+
+/*
+** mla_lane_w8_z17_z7_13:
+** mov [^\n]+
+** mov [^\n]+
+** smlall za\.s\[w8, 0:3, vgx2\], [^\n]+, z7\.b\[13\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_13, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w8, z17, z7, 13),
+ svmla_lane_za32_vg4x2 (w8, z17, z7, 13))
+
+/*
+** mla_lane_w8_z22_z4_15:
+** smlall za\.s\[w8, 0:3, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svint8x2_t, svint8_t,
+ svmla_lane_za32_s8_vg4x2 (w8, z22, z4, 15),
+ svmla_lane_za32_vg4x2 (w8, z22, z4, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (0, z0, z4, 0),
+ svmla_lane_za32_vg4x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w0, z0, z7, 1),
+ svmla_lane_za32_vg4x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** smlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w8, z28, z4, 2),
+ svmla_lane_za32_vg4x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w11p4_z0_z4_7:
+** smlall za\.s\[w11, 4:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w11p4_z0_z4_7, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w11 + 4, z0, z4, 7),
+ svmla_lane_za32_vg4x4 (w11 + 4, z0, z4, 7))
+
+/*
+** mla_lane_w8p6_z0_z4_8:
+** add (w8|w9|w10|w11), w8, #?6
+** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_8, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w8 + 6, z0, z4, 8),
+ svmla_lane_za32_vg4x4 (w8 + 6, z0, z4, 8))
+
+/*
+** mla_lane_w8p7_z0_z4_9:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_9, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w8 + 7, z0, z4, 9),
+ svmla_lane_za32_vg4x4 (w8 + 7, z0, z4, 9))
+
+/*
+** mla_lane_w8p8_z0_z4_10:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_10, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w8 + 8, z0, z4, 10),
+ svmla_lane_za32_vg4x4 (w8 + 8, z0, z4, 10))
+
+/*
+** mla_lane_w0m1_z0_z4_11:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_11, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w0 - 1, z0, z4, 11),
+ svmla_lane_za32_vg4x4 (w0 - 1, z0, z4, 11))
+
+/*
+** mla_lane_w8_z4_z15_12:
+** str d15, \[sp, #?-16\]!
+** smlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_12, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w8, z4, z15, 12),
+ svmla_lane_za32_vg4x4 (w8, z4, z15, 12))
+
+/*
+** mla_lane_w8_z28_z16_13:
+** mov (z[0-7]).d, z16.d
+** smlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_13, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w8, z28, z16, 13),
+ svmla_lane_za32_vg4x4 (w8, z28, z16, 13))
+
+/*
+** mla_lane_w8_z17_z7_14:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z7\.b\[14\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_14, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w8, z17, z7, 14),
+ svmla_lane_za32_vg4x4 (w8, z17, z7, 14))
+
+/*
+** mla_lane_w8_z22_z4_15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z4\.b\[15\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svint8x4_t, svint8_t,
+ svmla_lane_za32_s8_vg4x4 (w8, z22, z4, 15),
+ svmla_lane_za32_vg4x4 (w8, z22, z4, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** umlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_0_z0_z0_0, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (0, z0, z0, 0),
+ svmla_lane_za32_vg2x1 (0, z0, z0, 0))
+
+/*
+** mla_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w0, z0, z3, 1),
+ svmla_lane_za32_vg2x1 (w0, z0, z3, 1))
+
+/*
+** mla_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w7, z0, z3, 2),
+ svmla_lane_za32_vg2x1 (w7, z0, z3, 2))
+
+/*
+** mla_lane_w8_z7_z3_3:
+** umlal za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w8, z7, z3, 3),
+ svmla_lane_za32_vg2x1 (w8, z7, z3, 3))
+
+/*
+** mla_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** umlal za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w8, z31, z16, 4),
+ svmla_lane_za32_vg2x1 (w8, z31, z16, 4))
+
+/*
+** mla_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w8 + 1, z0, z0, 5),
+ svmla_lane_za32_vg2x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mla_lane_w8p2_z23_z0_6:
+** umlal za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w8 + 2, z23, z0, 6),
+ svmla_lane_za32_vg2x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mla_lane_w11p6_z23_z0_7:
+** umlal za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p6_z23_z0_7, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w11 + 6, z23, z0, 7),
+ svmla_lane_za32_vg2x1 (w11 + 6, z23, z0, 7))
+
+/*
+** mla_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w8 + 7, z7, z7, 0),
+ svmla_lane_za32_vg2x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mla_lane_w11p10_z23_z0_1:
+** umlal za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p10_z23_z0_1, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w11 + 10, z23, z0, 1),
+ svmla_lane_za32_vg2x1 (w11 + 10, z23, z0, 1))
+
+/*
+** mla_lane_w8p14_z23_z0_2:
+** umlal za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w8 + 14, z23, z0, 2),
+ svmla_lane_za32_vg2x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mla_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w8 + 15, z7, z7, 3),
+ svmla_lane_za32_vg2x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mla_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w8 + 16, z7, z7, 4),
+ svmla_lane_za32_vg2x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mla_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w8 - 1, z16, z0, 5),
+ svmla_lane_za32_vg2x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mla_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svuint16_t,
+ svmla_lane_za32_u16_vg2x1 (w12, z0, z3, 6),
+ svmla_lane_za32_vg2x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (0, z0, z4, 0),
+ svmla_lane_za32_vg2x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w0, z0, z7, 1),
+ svmla_lane_za32_vg2x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** umlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w8, z28, z4, 2),
+ svmla_lane_za32_vg2x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p6_z0_z4_7:
+** umlal za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w8 + 6, z0, z4, 7),
+ svmla_lane_za32_vg2x2 (w8 + 6, z0, z4, 7))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg2x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w8 + 8, z0, z4, 4),
+ svmla_lane_za32_vg2x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w0 - 1, z0, z4, 5),
+ svmla_lane_za32_vg2x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** umlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w8, z4, z15, 6),
+ svmla_lane_za32_vg2x2 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** umlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w8, z28, z16, 7),
+ svmla_lane_za32_vg2x2 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** umlal za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w8, z17, z7, 0),
+ svmla_lane_za32_vg2x2 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** umlal za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x2 (w8, z22, z4, 1),
+ svmla_lane_za32_vg2x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (0, z0, z4, 0),
+ svmla_lane_za32_vg2x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w0, z0, z7, 1),
+ svmla_lane_za32_vg2x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** umlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w8, z28, z4, 2),
+ svmla_lane_za32_vg2x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p6_z0_z4_7:
+** umlal za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w8 + 6, z0, z4, 7),
+ svmla_lane_za32_vg2x4 (w8 + 6, z0, z4, 7))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w8 + 7, z0, z4, 3),
+ svmla_lane_za32_vg2x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w8 + 8, z0, z4, 4),
+ svmla_lane_za32_vg2x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w0 - 1, z0, z4, 5),
+ svmla_lane_za32_vg2x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** umlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w8, z4, z15, 6),
+ svmla_lane_za32_vg2x4 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** umlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w8, z28, z16, 7),
+ svmla_lane_za32_vg2x4 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w8, z17, z7, 0),
+ svmla_lane_za32_vg2x4 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t,
+ svmla_lane_za32_u16_vg2x4 (w8, z22, z4, 1),
+ svmla_lane_za32_vg2x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_0_z0_z0_0, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (0, z0, z0, 0),
+ svmla_lane_za32_vg4x1 (0, z0, z0, 0))
+
+/*
+** mla_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w0, z0, z3, 1),
+ svmla_lane_za32_vg4x1 (w0, z0, z3, 1))
+
+/*
+** mla_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w7, z0, z3, 2),
+ svmla_lane_za32_vg4x1 (w7, z0, z3, 2))
+
+/*
+** mla_lane_w8_z7_z3_3:
+** umlall za\.s\[w8, 0:3\], z7\.b, z3\.b\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w8, z7, z3, 3),
+ svmla_lane_za32_vg4x1 (w8, z7, z3, 3))
+
+/*
+** mla_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** umlall za\.s\[w8, 0:3\], z31\.b. \1\.b\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w8, z31, z16, 4),
+ svmla_lane_za32_vg4x1 (w8, z31, z16, 4))
+
+/*
+** mla_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w8 + 1, z0, z0, 5),
+ svmla_lane_za32_vg4x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mla_lane_w8p2_z23_z0_6:
+** add (w8|w9|w10|w11), w8, #?2
+** umlall za\.s\[\1, 0:3\], z23\.b, z0\.b\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w8 + 2, z23, z0, 6),
+ svmla_lane_za32_vg4x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mla_lane_w11p4_z23_z0_7:
+** umlall za\.s\[w11, 4:7\], z23\.b, z0\.b\[7\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p4_z23_z0_7, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w11 + 4, z23, z0, 7),
+ svmla_lane_za32_vg4x1 (w11 + 4, z23, z0, 7))
+
+/*
+** mla_lane_w8p7_z7_z7_8:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[8\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p7_z7_z7_8, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w8 + 7, z7, z7, 8),
+ svmla_lane_za32_vg4x1 (w8 + 7, z7, z7, 8))
+
+/*
+** mla_lane_w11p12_z23_z0_9:
+** umlall za\.s\[w11, 12:15\], z23\.b, z0\.b\[9\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p12_z23_z0_9, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w11 + 12, z23, z0, 9),
+ svmla_lane_za32_vg4x1 (w11 + 12, z23, z0, 9))
+
+/*
+** mla_lane_w8p14_z23_z0_10:
+** add (w8|w9|w10|w11), w8, #?14
+** umlall za\.s\[w8, 0:3\], z23\.b, z0\.b\[10\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p14_z23_z0_10, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w8 + 14, z23, z0, 10),
+ svmla_lane_za32_vg4x1 (w8 + 14, z23, z0, 10))
+
+/*
+** mla_lane_w8p15_z7_z7_11:
+** add (w8|w9|w10|w11), w8, #?15
+** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[11\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p15_z7_z7_11, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w8 + 15, z7, z7, 11),
+ svmla_lane_za32_vg4x1 (w8 + 15, z7, z7, 11))
+
+/*
+** mla_lane_w8p16_z7_z7_12:
+** add (w8|w9|w10|w11), w8, #?16
+** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[12\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p16_z7_z7_12, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w8 + 16, z7, z7, 12),
+ svmla_lane_za32_vg4x1 (w8 + 16, z7, z7, 12))
+
+/*
+** mla_lane_w8m1_z16_z0_13:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3\], z16\.b, z0\.b\[13\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8m1_z16_z0_13, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w8 - 1, z16, z0, 13),
+ svmla_lane_za32_vg4x1 (w8 - 1, z16, z0, 13))
+
+/*
+** mla_lane_w12_z0_z3_15:
+** mov (w8|w9|w10|w11), w12
+** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[15\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w12_z0_z3_15, svuint8_t,
+ svmla_lane_za32_u8_vg4x1 (w12, z0, z3, 15),
+ svmla_lane_za32_vg4x1 (w12, z0, z3, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (0, z0, z4, 0),
+ svmla_lane_za32_vg4x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w0, z0, z7, 1),
+ svmla_lane_za32_vg4x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** umlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w8, z28, z4, 2),
+ svmla_lane_za32_vg4x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w11p4_z0_z4_3:
+** umlall za\.s\[w11, 4:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w11 + 4, z0, z4, 3),
+ svmla_lane_za32_vg4x2 (w11 + 4, z0, z4, 3))
+
+/*
+** mla_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w8 + 6, z0, z4, 4),
+ svmla_lane_za32_vg4x2 (w8 + 6, z0, z4, 4))
+
+/*
+** mla_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w8 + 7, z0, z4, 5),
+ svmla_lane_za32_vg4x2 (w8 + 7, z0, z4, 5))
+
+/*
+** mla_lane_w8p8_z0_z4_7:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_7, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w8 + 8, z0, z4, 7),
+ svmla_lane_za32_vg4x2 (w8 + 8, z0, z4, 7))
+
+/*
+** mla_lane_w0m1_z0_z4_9:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_9, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w0 - 1, z0, z4, 9),
+ svmla_lane_za32_vg4x2 (w0 - 1, z0, z4, 9))
+
+/*
+** mla_lane_w8_z4_z15_10:
+** str d15, \[sp, #?-16\]!
+** umlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_10, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w8, z4, z15, 10),
+ svmla_lane_za32_vg4x2 (w8, z4, z15, 10))
+
+/*
+** mla_lane_w8_z28_z16_11:
+** mov (z[0-7]).d, z16.d
+** umlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_11, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w8, z28, z16, 11),
+ svmla_lane_za32_vg4x2 (w8, z28, z16, 11))
+
+/*
+** mla_lane_w8_z17_z7_13:
+** mov [^\n]+
+** mov [^\n]+
+** umlall za\.s\[w8, 0:3, vgx2\], [^\n]+, z7\.b\[13\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_13, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w8, z17, z7, 13),
+ svmla_lane_za32_vg4x2 (w8, z17, z7, 13))
+
+/*
+** mla_lane_w8_z22_z4_15:
+** umlall za\.s\[w8, 0:3, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svuint8x2_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x2 (w8, z22, z4, 15),
+ svmla_lane_za32_vg4x2 (w8, z22, z4, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (0, z0, z4, 0),
+ svmla_lane_za32_vg4x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w0, z0, z7, 1),
+ svmla_lane_za32_vg4x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** umlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w8, z28, z4, 2),
+ svmla_lane_za32_vg4x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w11p4_z0_z4_7:
+** umlall za\.s\[w11, 4:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w11p4_z0_z4_7, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w11 + 4, z0, z4, 7),
+ svmla_lane_za32_vg4x4 (w11 + 4, z0, z4, 7))
+
+/*
+** mla_lane_w8p6_z0_z4_8:
+** add (w8|w9|w10|w11), w8, #?6
+** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_8, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w8 + 6, z0, z4, 8),
+ svmla_lane_za32_vg4x4 (w8 + 6, z0, z4, 8))
+
+/*
+** mla_lane_w8p7_z0_z4_9:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_9, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w8 + 7, z0, z4, 9),
+ svmla_lane_za32_vg4x4 (w8 + 7, z0, z4, 9))
+
+/*
+** mla_lane_w8p8_z0_z4_10:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_10, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w8 + 8, z0, z4, 10),
+ svmla_lane_za32_vg4x4 (w8 + 8, z0, z4, 10))
+
+/*
+** mla_lane_w0m1_z0_z4_11:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_11, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w0 - 1, z0, z4, 11),
+ svmla_lane_za32_vg4x4 (w0 - 1, z0, z4, 11))
+
+/*
+** mla_lane_w8_z4_z15_12:
+** str d15, \[sp, #?-16\]!
+** umlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_12, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w8, z4, z15, 12),
+ svmla_lane_za32_vg4x4 (w8, z4, z15, 12))
+
+/*
+** mla_lane_w8_z28_z16_13:
+** mov (z[0-7]).d, z16.d
+** umlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_13, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w8, z28, z16, 13),
+ svmla_lane_za32_vg4x4 (w8, z28, z16, 13))
+
+/*
+** mla_lane_w8_z17_z7_14:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z7\.b\[14\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_14, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w8, z17, z7, 14),
+ svmla_lane_za32_vg4x4 (w8, z17, z7, 14))
+
+/*
+** mla_lane_w8_z22_z4_15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z4\.b\[15\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svuint8x4_t, svuint8_t,
+ svmla_lane_za32_u8_vg4x4 (w8, z22, z4, 15),
+ svmla_lane_za32_vg4x4 (w8, z22, z4, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (0, z0, z4, 0),
+ svmla_lane_za64_vg1x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z7\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (w0, z0, z7, 1),
+ svmla_lane_za64_vg1x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** fmla za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (w8, z28, z4, 0),
+ svmla_lane_za64_vg1x2 (w8, z28, z4, 0))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** fmla za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (w8 + 7, z0, z4, 1),
+ svmla_lane_za64_vg1x2 (w8 + 7, z0, z4, 1))
+
+/*
+** mla_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_0, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (w8 + 8, z0, z4, 0),
+ svmla_lane_za64_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** mla_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_1, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (w0 - 1, z0, z4, 1),
+ svmla_lane_za64_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** mla_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fmla za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, z15\.d\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_2, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (w8, z4, z15, 0),
+ svmla_lane_za64_vg1x2 (w8, z4, z15, 0))
+
+/*
+** mla_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fmla za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}, \1\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (w8, z28, z16, 1),
+ svmla_lane_za64_vg1x2 (w8, z28, z16, 1))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** fmla za\.d\[w8, 0, vgx2\], [^\n]+, z7\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (w8, z17, z7, 0),
+ svmla_lane_za64_vg1x2 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** fmla za\.d\[w8, 0, vgx2\], {z22\.d - z23\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat64x2_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x2 (w8, z22, z4, 1),
+ svmla_lane_za64_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (0, z0, z4, 0),
+ svmla_lane_za64_vg1x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z7\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (w0, z0, z7, 1),
+ svmla_lane_za64_vg1x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** fmla za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (w8, z28, z4, 0),
+ svmla_lane_za64_vg1x4 (w8, z28, z4, 0))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+** fmla za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (w8 + 7, z0, z4, 1),
+ svmla_lane_za64_vg1x4 (w8 + 7, z0, z4, 1))
+
+/*
+** mla_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_0, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (w8 + 8, z0, z4, 0),
+ svmla_lane_za64_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** mla_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_1, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (w0 - 1, z0, z4, 1),
+ svmla_lane_za64_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** mla_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fmla za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}, z15\.d\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_2, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (w8, z4, z15, 0),
+ svmla_lane_za64_vg1x4 (w8, z4, z15, 0))
+
+/*
+** mla_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fmla za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, \1\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (w8, z28, z16, 1),
+ svmla_lane_za64_vg1x4 (w8, z28, z16, 1))
+
+/*
+** mla_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmla za\.d\[w8, 0, vgx4\], [^\n]+, z7\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (w8, z17, z7, 0),
+ svmla_lane_za64_vg1x4 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmla za\.d\[w8, 0, vgx4\], [^\n]+, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat64x4_t, svfloat64_t,
+ svmla_lane_za64_f64_vg1x4 (w8, z22, z4, 1),
+ svmla_lane_za64_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.d\[\1, 0:3\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_0_z0_z0_0, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (0, z0, z0, 0),
+ svmla_lane_za64_vg4x1 (0, z0, z0, 0))
+
+/*
+** mla_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w0, z0, z3, 1),
+ svmla_lane_za64_vg4x1 (w0, z0, z3, 1))
+
+/*
+** mla_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w7, z0, z3, 2),
+ svmla_lane_za64_vg4x1 (w7, z0, z3, 2))
+
+/*
+** mla_lane_w8_z7_z3_3:
+** smlall za\.d\[w8, 0:3\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w8, z7, z3, 3),
+ svmla_lane_za64_vg4x1 (w8, z7, z3, 3))
+
+/*
+** mla_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** smlall za\.d\[w8, 0:3\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w8, z31, z16, 4),
+ svmla_lane_za64_vg4x1 (w8, z31, z16, 4))
+
+/*
+** mla_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w8 + 1, z0, z0, 5),
+ svmla_lane_za64_vg4x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mla_lane_w8p2_z23_z0_6:
+** add (w8|w9|w10|w11), w8, #?2
+** smlall za\.d\[\1, 0:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w8 + 2, z23, z0, 6),
+ svmla_lane_za64_vg4x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mla_lane_w11p4_z23_z0_7:
+** smlall za\.d\[w11, 4:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p4_z23_z0_7, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w11 + 4, z23, z0, 7),
+ svmla_lane_za64_vg4x1 (w11 + 4, z23, z0, 7))
+
+/*
+** mla_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w8 + 7, z7, z7, 0),
+ svmla_lane_za64_vg4x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mla_lane_w11p12_z23_z0_1:
+** smlall za\.d\[w11, 12:15\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p12_z23_z0_1, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w11 + 12, z23, z0, 1),
+ svmla_lane_za64_vg4x1 (w11 + 12, z23, z0, 1))
+
+/*
+** mla_lane_w8p14_z23_z0_2:
+** add (w8|w9|w10|w11), w8, #?14
+** smlall za\.d\[w8, 0:3\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w8 + 14, z23, z0, 2),
+ svmla_lane_za64_vg4x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mla_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w8 + 15, z7, z7, 3),
+ svmla_lane_za64_vg4x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mla_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w8 + 16, z7, z7, 4),
+ svmla_lane_za64_vg4x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mla_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w8 - 1, z16, z0, 5),
+ svmla_lane_za64_vg4x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mla_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svint16_t,
+ svmla_lane_za64_s16_vg4x1 (w12, z0, z3, 6),
+ svmla_lane_za64_vg4x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (0, z0, z4, 0),
+ svmla_lane_za64_vg4x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w0, z0, z7, 1),
+ svmla_lane_za64_vg4x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** smlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w8, z28, z4, 2),
+ svmla_lane_za64_vg4x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w11p4_z0_z4_3:
+** smlall za\.d\[w11, 4:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w11 + 4, z0, z4, 3),
+ svmla_lane_za64_vg4x2 (w11 + 4, z0, z4, 3))
+
+/*
+** mla_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w8 + 6, z0, z4, 4),
+ svmla_lane_za64_vg4x2 (w8 + 6, z0, z4, 4))
+
+/*
+** mla_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w8 + 7, z0, z4, 5),
+ svmla_lane_za64_vg4x2 (w8 + 7, z0, z4, 5))
+
+/*
+** mla_lane_w8p8_z0_z4_6:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_6, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w8 + 8, z0, z4, 6),
+ svmla_lane_za64_vg4x2 (w8 + 8, z0, z4, 6))
+
+/*
+** mla_lane_w0m1_z0_z4_7:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_7, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w0 - 1, z0, z4, 7),
+ svmla_lane_za64_vg4x2 (w0 - 1, z0, z4, 7))
+
+/*
+** mla_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** smlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_0, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w8, z4, z15, 0),
+ svmla_lane_za64_vg4x2 (w8, z4, z15, 0))
+
+/*
+** mla_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** smlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_1, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w8, z28, z16, 1),
+ svmla_lane_za64_vg4x2 (w8, z28, z16, 1))
+
+/*
+** mla_lane_w8_z17_z7_3:
+** mov [^\n]+
+** mov [^\n]+
+** smlall za\.d\[w8, 0:3, vgx2\], [^\n]+, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_3, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w8, z17, z7, 3),
+ svmla_lane_za64_vg4x2 (w8, z17, z7, 3))
+
+/*
+** mla_lane_w8_z22_z4_5:
+** smlall za\.d\[w8, 0:3, vgx2\], {z22\.h - z23\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_5, svint16x2_t, svint16_t,
+ svmla_lane_za64_s16_vg4x2 (w8, z22, z4, 5),
+ svmla_lane_za64_vg4x2 (w8, z22, z4, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (0, z0, z4, 0),
+ svmla_lane_za64_vg4x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w0, z0, z7, 1),
+ svmla_lane_za64_vg4x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** smlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w8, z28, z4, 2),
+ svmla_lane_za64_vg4x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w11p4_z0_z4_3:
+** smlall za\.d\[w11, 4:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w11 + 4, z0, z4, 3),
+ svmla_lane_za64_vg4x4 (w11 + 4, z0, z4, 3))
+
+/*
+** mla_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w8 + 6, z0, z4, 4),
+ svmla_lane_za64_vg4x4 (w8 + 6, z0, z4, 4))
+
+/*
+** mla_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w8 + 7, z0, z4, 5),
+ svmla_lane_za64_vg4x4 (w8 + 7, z0, z4, 5))
+
+/*
+** mla_lane_w8p8_z0_z4_6:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_6, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w8 + 8, z0, z4, 6),
+ svmla_lane_za64_vg4x4 (w8 + 8, z0, z4, 6))
+
+/*
+** mla_lane_w0m1_z0_z4_7:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_7, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w0 - 1, z0, z4, 7),
+ svmla_lane_za64_vg4x4 (w0 - 1, z0, z4, 7))
+
+/*
+** mla_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** smlall za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_0, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w8, z4, z15, 0),
+ svmla_lane_za64_vg4x4 (w8, z4, z15, 0))
+
+/*
+** mla_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** smlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w8, z28, z16, 3),
+ svmla_lane_za64_vg4x4 (w8, z28, z16, 3))
+
+/*
+** mla_lane_w8_z17_z7_4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlall za\.d\[w8, 0:3, vgx4\], [^\n]+, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_4, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w8, z17, z7, 4),
+ svmla_lane_za64_vg4x4 (w8, z17, z7, 4))
+
+/*
+** mla_lane_w8_z22_z4_6:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlall za\.d\[w8, 0:3, vgx4\], [^\n]+, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_6, svint16x4_t, svint16_t,
+ svmla_lane_za64_s16_vg4x4 (w8, z22, z4, 6),
+ svmla_lane_za64_vg4x4 (w8, z22, z4, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.d\[\1, 0:3\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_0_z0_z0_0, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (0, z0, z0, 0),
+ svmla_lane_za64_vg4x1 (0, z0, z0, 0))
+
+/*
+** mla_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w0, z0, z3, 1),
+ svmla_lane_za64_vg4x1 (w0, z0, z3, 1))
+
+/*
+** mla_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w7, z0, z3, 2),
+ svmla_lane_za64_vg4x1 (w7, z0, z3, 2))
+
+/*
+** mla_lane_w8_z7_z3_3:
+** umlall za\.d\[w8, 0:3\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w8, z7, z3, 3),
+ svmla_lane_za64_vg4x1 (w8, z7, z3, 3))
+
+/*
+** mla_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** umlall za\.d\[w8, 0:3\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w8, z31, z16, 4),
+ svmla_lane_za64_vg4x1 (w8, z31, z16, 4))
+
+/*
+** mla_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w8 + 1, z0, z0, 5),
+ svmla_lane_za64_vg4x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mla_lane_w8p2_z23_z0_6:
+** add (w8|w9|w10|w11), w8, #?2
+** umlall za\.d\[\1, 0:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w8 + 2, z23, z0, 6),
+ svmla_lane_za64_vg4x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mla_lane_w11p4_z23_z0_7:
+** umlall za\.d\[w11, 4:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p4_z23_z0_7, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w11 + 4, z23, z0, 7),
+ svmla_lane_za64_vg4x1 (w11 + 4, z23, z0, 7))
+
+/*
+** mla_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w8 + 7, z7, z7, 0),
+ svmla_lane_za64_vg4x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mla_lane_w11p12_z23_z0_1:
+** umlall za\.d\[w11, 12:15\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w11p12_z23_z0_1, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w11 + 12, z23, z0, 1),
+ svmla_lane_za64_vg4x1 (w11 + 12, z23, z0, 1))
+
+/*
+** mla_lane_w8p14_z23_z0_2:
+** add (w8|w9|w10|w11), w8, #?14
+** umlall za\.d\[w8, 0:3\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w8 + 14, z23, z0, 2),
+ svmla_lane_za64_vg4x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mla_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w8 + 15, z7, z7, 3),
+ svmla_lane_za64_vg4x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mla_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w8 + 16, z7, z7, 4),
+ svmla_lane_za64_vg4x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mla_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w8 - 1, z16, z0, 5),
+ svmla_lane_za64_vg4x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mla_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svuint16_t,
+ svmla_lane_za64_u16_vg4x1 (w12, z0, z3, 6),
+ svmla_lane_za64_vg4x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (0, z0, z4, 0),
+ svmla_lane_za64_vg4x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w0, z0, z7, 1),
+ svmla_lane_za64_vg4x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** umlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w8, z28, z4, 2),
+ svmla_lane_za64_vg4x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w11p4_z0_z4_3:
+** umlall za\.d\[w11, 4:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w11 + 4, z0, z4, 3),
+ svmla_lane_za64_vg4x2 (w11 + 4, z0, z4, 3))
+
+/*
+** mla_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w8 + 6, z0, z4, 4),
+ svmla_lane_za64_vg4x2 (w8 + 6, z0, z4, 4))
+
+/*
+** mla_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w8 + 7, z0, z4, 5),
+ svmla_lane_za64_vg4x2 (w8 + 7, z0, z4, 5))
+
+/*
+** mla_lane_w8p8_z0_z4_6:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_6, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w8 + 8, z0, z4, 6),
+ svmla_lane_za64_vg4x2 (w8 + 8, z0, z4, 6))
+
+/*
+** mla_lane_w0m1_z0_z4_7:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_7, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w0 - 1, z0, z4, 7),
+ svmla_lane_za64_vg4x2 (w0 - 1, z0, z4, 7))
+
+/*
+** mla_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** umlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_0, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w8, z4, z15, 0),
+ svmla_lane_za64_vg4x2 (w8, z4, z15, 0))
+
+/*
+** mla_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** umlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_1, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w8, z28, z16, 1),
+ svmla_lane_za64_vg4x2 (w8, z28, z16, 1))
+
+/*
+** mla_lane_w8_z17_z7_3:
+** mov [^\n]+
+** mov [^\n]+
+** umlall za\.d\[w8, 0:3, vgx2\], [^\n]+, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_3, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w8, z17, z7, 3),
+ svmla_lane_za64_vg4x2 (w8, z17, z7, 3))
+
+/*
+** mla_lane_w8_z22_z4_5:
+** umlall za\.d\[w8, 0:3, vgx2\], {z22\.h - z23\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_5, svuint16x2_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x2 (w8, z22, z4, 5),
+ svmla_lane_za64_vg4x2 (w8, z22, z4, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (0, z0, z4, 0),
+ svmla_lane_za64_vg4x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w0, z0, z7, 1),
+ svmla_lane_za64_vg4x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+** umlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w8, z28, z4, 2),
+ svmla_lane_za64_vg4x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w11p4_z0_z4_3:
+** umlall za\.d\[w11, 4:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w11 + 4, z0, z4, 3),
+ svmla_lane_za64_vg4x4 (w11 + 4, z0, z4, 3))
+
+/*
+** mla_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w8 + 6, z0, z4, 4),
+ svmla_lane_za64_vg4x4 (w8 + 6, z0, z4, 4))
+
+/*
+** mla_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w8 + 7, z0, z4, 5),
+ svmla_lane_za64_vg4x4 (w8 + 7, z0, z4, 5))
+
+/*
+** mla_lane_w8p8_z0_z4_6:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_6, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w8 + 8, z0, z4, 6),
+ svmla_lane_za64_vg4x4 (w8 + 8, z0, z4, 6))
+
+/*
+** mla_lane_w0m1_z0_z4_7:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_7, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w0 - 1, z0, z4, 7),
+ svmla_lane_za64_vg4x4 (w0 - 1, z0, z4, 7))
+
+/*
+** mla_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** umlall za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_0, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w8, z4, z15, 0),
+ svmla_lane_za64_vg4x4 (w8, z4, z15, 0))
+
+/*
+** mla_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** umlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w8, z28, z16, 3),
+ svmla_lane_za64_vg4x4 (w8, z28, z16, 3))
+
+/*
+** mla_lane_w8_z17_z7_4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlall za\.d\[w8, 0:3, vgx4\], [^\n]+, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_4, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w8, z17, z7, 4),
+ svmla_lane_za64_vg4x4 (w8, z17, z7, 4))
+
+/*
+** mla_lane_w8_z22_z4_6:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlall za\.d\[w8, 0:3, vgx4\], [^\n]+, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_6, svuint16x4_t, svuint16_t,
+ svmla_lane_za64_u16_vg4x4 (w8, z22, z4, 6),
+ svmla_lane_za64_vg4x4 (w8, z22, z4, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_0_z0_z0, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (0, z0, z0),
+ svmla_za32_vg2x1 (0, z0, z0))
+
+/*
+** mla_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w0_z0_z3, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w0, z0, z3),
+ svmla_za32_vg2x1 (w0, z0, z3))
+
+/*
+** mla_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w7_z0_z3, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w7, z0, z3),
+ svmla_za32_vg2x1 (w7, z0, z3))
+
+/*
+** mla_w8_z7_z3:
+** bfmlal za\.s\[w8, 0:1\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z7_z3, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w8, z7, z3),
+ svmla_za32_vg2x1 (w8, z7, z3))
+
+/*
+** mla_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** bfmlal za\.s\[w8, 0:1\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z31_z16, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w8, z31, z16),
+ svmla_za32_vg2x1 (w8, z31, z16))
+
+/*
+** mla_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p1_z0_z0, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w8 + 1, z0, z0),
+ svmla_za32_vg2x1 (w8 + 1, z0, z0))
+
+/*
+** mla_w8p2_z23_z0:
+** bfmlal za\.s\[w8, 2:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p2_z23_z0, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w8 + 2, z23, z0),
+ svmla_za32_vg2x1 (w8 + 2, z23, z0))
+
+/*
+** mla_w11p6_z23_z0:
+** bfmlal za\.s\[w11, 6:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p6_z23_z0, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w11 + 6, z23, z0),
+ svmla_za32_vg2x1 (w11 + 6, z23, z0))
+
+/*
+** mla_w8p7_z7_z7:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p7_z7_z7, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w8 + 7, z7, z7),
+ svmla_za32_vg2x1 (w8 + 7, z7, z7))
+
+/*
+** mla_w11p10_z23_z0:
+** bfmlal za\.s\[w11, 10:11\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p10_z23_z0, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w11 + 10, z23, z0),
+ svmla_za32_vg2x1 (w11 + 10, z23, z0))
+
+/*
+** mla_w8p14_z23_z0:
+** bfmlal za\.s\[w8, 14:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p14_z23_z0, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w8 + 14, z23, z0),
+ svmla_za32_vg2x1 (w8 + 14, z23, z0))
+
+/*
+** mla_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p15_z7_z7, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w8 + 15, z7, z7),
+ svmla_za32_vg2x1 (w8 + 15, z7, z7))
+
+/*
+** mla_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p16_z7_z7, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w8 + 16, z7, z7),
+ svmla_za32_vg2x1 (w8 + 16, z7, z7))
+
+/*
+** mla_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8m1_z16_z0, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w8 - 1, z16, z0),
+ svmla_za32_vg2x1 (w8 - 1, z16, z0))
+
+/*
+** mla_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w12_z0_z3, svbfloat16_t,
+ svmla_za32_bf16_vg2x1 (w12, z0, z3),
+ svmla_za32_vg2x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (0, z0, z0),
+ svmla_za32_vg2x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w0, z0, z0),
+ svmla_za32_vg2x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8, z0, z4),
+ svmla_za32_vg2x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8, z4, z18),
+ svmla_za32_vg2x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z23:
+** ...
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8, z0, z23),
+ svmla_za32_vg2x2 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** bfmlal za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8, z23, z0),
+ svmla_za32_vg2x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z28:
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8, z18, z28),
+ svmla_za32_vg2x2 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z4:
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z4, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8, z28, z4),
+ svmla_za32_vg2x2 (w8, z28, z4))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8 + 1, z4, z0),
+ svmla_za32_vg2x2 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** bfmlal za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8 + 2, z4, z0),
+ svmla_za32_vg2x2 (w8 + 2, z4, z0))
+
+/*
+** mla_w8p6_z4_z0:
+** bfmlal za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p6_z4_z0, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8 + 6, z4, z0),
+ svmla_za32_vg2x2 (w8 + 6, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8 + 7, z4, z0),
+ svmla_za32_vg2x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8 + 8, z4, z4),
+ svmla_za32_vg2x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svbfloat16x2_t,
+ svmla_za32_bf16_vg2x2 (w8 - 1, z4, z0),
+ svmla_za32_vg2x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (0, z1, z0),
+ svmla_za32_vg2x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w0, z1, z0),
+ svmla_za32_vg2x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w8, z1, z0),
+ svmla_za32_vg2x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w8 + 1, z1, z0),
+ svmla_za32_vg2x2 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** bfmlal za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w8 + 4, z20, z0),
+ svmla_za32_vg2x2 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** bfmlal za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w8 + 6, z27, z0),
+ svmla_za32_vg2x2 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w8 + 7, z1, z0),
+ svmla_za32_vg2x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w8 + 8, z1, z0),
+ svmla_za32_vg2x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w0 - 1, z1, z0),
+ svmla_za32_vg2x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w8, z0, z15),
+ svmla_za32_vg2x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** bfmlal za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svbfloat16x2_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x2 (w8, z20, z16),
+ svmla_za32_vg2x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (0, z0, z0),
+ svmla_za32_vg2x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w0, z0, z0),
+ svmla_za32_vg2x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8, z0, z4),
+ svmla_za32_vg2x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8, z0, z18),
+ svmla_za32_vg2x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z0:
+** ...
+** bfmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8, z18, z0),
+ svmla_za32_vg2x4 (w8, z18, z0))
+
+/*
+** mla_w8_z0_z23:
+** ...
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8, z0, z23),
+ svmla_za32_vg2x4 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** bfmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8, z23, z0),
+ svmla_za32_vg2x4 (w8, z23, z0))
+
+/*
+** mla_w8_z4_z28:
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8, z4, z28),
+ svmla_za32_vg2x4 (w8, z4, z28))
+
+/*
+** mla_w8_z28_z0:
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8, z28, z0),
+ svmla_za32_vg2x4 (w8, z28, z0))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8 + 1, z4, z0),
+ svmla_za32_vg2x4 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** bfmlal za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8 + 2, z4, z0),
+ svmla_za32_vg2x4 (w8 + 2, z4, z0))
+
+/*
+** mla_w8p6_z4_z0:
+** bfmlal za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p6_z4_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8 + 6, z4, z0),
+ svmla_za32_vg2x4 (w8 + 6, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8 + 7, z4, z0),
+ svmla_za32_vg2x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8 + 8, z4, z4),
+ svmla_za32_vg2x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svbfloat16x4_t,
+ svmla_za32_bf16_vg2x4 (w8 - 1, z4, z0),
+ svmla_za32_vg2x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (0, z1, z0),
+ svmla_za32_vg2x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w0, z1, z0),
+ svmla_za32_vg2x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w8, z1, z0),
+ svmla_za32_vg2x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w8 + 1, z1, z0),
+ svmla_za32_vg2x4 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** bfmlal za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w8 + 4, z20, z0),
+ svmla_za32_vg2x4 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** bfmlal za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w8 + 6, z27, z0),
+ svmla_za32_vg2x4 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w8 + 7, z1, z0),
+ svmla_za32_vg2x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w8 + 8, z1, z0),
+ svmla_za32_vg2x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w0 - 1, z1, z0),
+ svmla_za32_vg2x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w8, z0, z15),
+ svmla_za32_vg2x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** bfmlal za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svbfloat16x4_t, svbfloat16_t,
+ svmla_single_za32_bf16_vg2x4 (w8, z20, z16),
+ svmla_za32_vg2x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlal za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_0_z0_z0, svfloat16_t,
+ svmla_za32_f16_vg2x1 (0, z0, z0),
+ svmla_za32_vg2x1 (0, z0, z0))
+
+/*
+** mla_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w0_z0_z3, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w0, z0, z3),
+ svmla_za32_vg2x1 (w0, z0, z3))
+
+/*
+** mla_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w7_z0_z3, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w7, z0, z3),
+ svmla_za32_vg2x1 (w7, z0, z3))
+
+/*
+** mla_w8_z7_z3:
+** fmlal za\.s\[w8, 0:1\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z7_z3, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w8, z7, z3),
+ svmla_za32_vg2x1 (w8, z7, z3))
+
+/*
+** mla_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmlal za\.s\[w8, 0:1\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z31_z16, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w8, z31, z16),
+ svmla_za32_vg2x1 (w8, z31, z16))
+
+/*
+** mla_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p1_z0_z0, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w8 + 1, z0, z0),
+ svmla_za32_vg2x1 (w8 + 1, z0, z0))
+
+/*
+** mla_w8p2_z23_z0:
+** fmlal za\.s\[w8, 2:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p2_z23_z0, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w8 + 2, z23, z0),
+ svmla_za32_vg2x1 (w8 + 2, z23, z0))
+
+/*
+** mla_w11p6_z23_z0:
+** fmlal za\.s\[w11, 6:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p6_z23_z0, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w11 + 6, z23, z0),
+ svmla_za32_vg2x1 (w11 + 6, z23, z0))
+
+/*
+** mla_w8p7_z7_z7:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p7_z7_z7, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w8 + 7, z7, z7),
+ svmla_za32_vg2x1 (w8 + 7, z7, z7))
+
+/*
+** mla_w11p10_z23_z0:
+** fmlal za\.s\[w11, 10:11\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p10_z23_z0, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w11 + 10, z23, z0),
+ svmla_za32_vg2x1 (w11 + 10, z23, z0))
+
+/*
+** mla_w8p14_z23_z0:
+** fmlal za\.s\[w8, 14:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p14_z23_z0, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w8 + 14, z23, z0),
+ svmla_za32_vg2x1 (w8 + 14, z23, z0))
+
+/*
+** mla_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p15_z7_z7, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w8 + 15, z7, z7),
+ svmla_za32_vg2x1 (w8 + 15, z7, z7))
+
+/*
+** mla_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p16_z7_z7, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w8 + 16, z7, z7),
+ svmla_za32_vg2x1 (w8 + 16, z7, z7))
+
+/*
+** mla_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8m1_z16_z0, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w8 - 1, z16, z0),
+ svmla_za32_vg2x1 (w8 - 1, z16, z0))
+
+/*
+** mla_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w12_z0_z3, svfloat16_t,
+ svmla_za32_f16_vg2x1 (w12, z0, z3),
+ svmla_za32_vg2x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (0, z0, z0),
+ svmla_za32_vg2x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w0, z0, z0),
+ svmla_za32_vg2x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** fmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8, z0, z4),
+ svmla_za32_vg2x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** fmlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8, z4, z18),
+ svmla_za32_vg2x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z23:
+** ...
+** fmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8, z0, z23),
+ svmla_za32_vg2x2 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** fmlal za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8, z23, z0),
+ svmla_za32_vg2x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z28:
+** fmlal za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8, z18, z28),
+ svmla_za32_vg2x2 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z4:
+** fmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z4, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8, z28, z4),
+ svmla_za32_vg2x2 (w8, z28, z4))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8 + 1, z4, z0),
+ svmla_za32_vg2x2 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** fmlal za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8 + 2, z4, z0),
+ svmla_za32_vg2x2 (w8 + 2, z4, z0))
+
+/*
+** mla_w8p6_z4_z0:
+** fmlal za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p6_z4_z0, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8 + 6, z4, z0),
+ svmla_za32_vg2x2 (w8 + 6, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8 + 7, z4, z0),
+ svmla_za32_vg2x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8 + 8, z4, z4),
+ svmla_za32_vg2x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svfloat16x2_t,
+ svmla_za32_f16_vg2x2 (w8 - 1, z4, z0),
+ svmla_za32_vg2x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (0, z1, z0),
+ svmla_za32_vg2x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w0, z1, z0),
+ svmla_za32_vg2x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** fmlal za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w8, z1, z0),
+ svmla_za32_vg2x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w8 + 1, z1, z0),
+ svmla_za32_vg2x2 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** fmlal za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w8 + 4, z20, z0),
+ svmla_za32_vg2x2 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** fmlal za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w8 + 6, z27, z0),
+ svmla_za32_vg2x2 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w8 + 7, z1, z0),
+ svmla_za32_vg2x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w8 + 8, z1, z0),
+ svmla_za32_vg2x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w0 - 1, z1, z0),
+ svmla_za32_vg2x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w8, z0, z15),
+ svmla_za32_vg2x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmlal za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat16x2_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x2 (w8, z20, z16),
+ svmla_za32_vg2x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (0, z0, z0),
+ svmla_za32_vg2x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w0, z0, z0),
+ svmla_za32_vg2x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** fmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8, z0, z4),
+ svmla_za32_vg2x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** fmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8, z0, z18),
+ svmla_za32_vg2x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z0:
+** ...
+** fmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8, z18, z0),
+ svmla_za32_vg2x4 (w8, z18, z0))
+
+/*
+** mla_w8_z0_z23:
+** ...
+** fmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8, z0, z23),
+ svmla_za32_vg2x4 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** fmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8, z23, z0),
+ svmla_za32_vg2x4 (w8, z23, z0))
+
+/*
+** mla_w8_z4_z28:
+** fmlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8, z4, z28),
+ svmla_za32_vg2x4 (w8, z4, z28))
+
+/*
+** mla_w8_z28_z0:
+** fmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8, z28, z0),
+ svmla_za32_vg2x4 (w8, z28, z0))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8 + 1, z4, z0),
+ svmla_za32_vg2x4 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** fmlal za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8 + 2, z4, z0),
+ svmla_za32_vg2x4 (w8 + 2, z4, z0))
+
+/*
+** mla_w8p6_z4_z0:
+** fmlal za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p6_z4_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8 + 6, z4, z0),
+ svmla_za32_vg2x4 (w8 + 6, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8 + 7, z4, z0),
+ svmla_za32_vg2x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8 + 8, z4, z4),
+ svmla_za32_vg2x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svfloat16x4_t,
+ svmla_za32_f16_vg2x4 (w8 - 1, z4, z0),
+ svmla_za32_vg2x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (0, z1, z0),
+ svmla_za32_vg2x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w0, z1, z0),
+ svmla_za32_vg2x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** fmlal za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w8, z1, z0),
+ svmla_za32_vg2x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w8 + 1, z1, z0),
+ svmla_za32_vg2x4 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** fmlal za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w8 + 4, z20, z0),
+ svmla_za32_vg2x4 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** fmlal za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w8 + 6, z27, z0),
+ svmla_za32_vg2x4 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w8 + 7, z1, z0),
+ svmla_za32_vg2x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w8 + 8, z1, z0),
+ svmla_za32_vg2x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w0 - 1, z1, z0),
+ svmla_za32_vg2x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w8, z0, z15),
+ svmla_za32_vg2x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmlal za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat16x4_t, svfloat16_t,
+ svmla_single_za32_f16_vg2x4 (w8, z20, z16),
+ svmla_za32_vg2x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (0, z0, z0),
+ svmla_za32_vg1x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (w0, z0, z0),
+ svmla_za32_vg1x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** fmla za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (w8, z0, z4),
+ svmla_za32_vg1x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** fmla za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (w8, z4, z18),
+ svmla_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z23_z0:
+** ...
+** fmla za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (w8, z23, z0),
+ svmla_za32_vg1x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z23:
+** ...
+** fmla za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z23, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (w8, z18, z23),
+ svmla_za32_vg1x2 (w8, z18, z23))
+
+/*
+** mla_w8_z4_z28:
+** fmla za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (w8, z4, z28),
+ svmla_za32_vg1x2 (w8, z4, z28))
+
+/*
+** mla_w8p7_z4_z0:
+** fmla za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (w8 + 7, z4, z0),
+ svmla_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (w8 + 8, z4, z4),
+ svmla_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmla za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svfloat32x2_t,
+ svmla_za32_f32_vg1x2 (w8 - 1, z4, z0),
+ svmla_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x2 (0, z1, z0),
+ svmla_za32_vg1x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x2 (w0, z1, z0),
+ svmla_za32_vg1x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** fmla za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x2 (w8, z1, z0),
+ svmla_za32_vg1x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** fmla za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x2 (w8 + 7, z1, z0),
+ svmla_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x2 (w8 + 8, z1, z0),
+ svmla_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmla za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x2 (w0 - 1, z1, z0),
+ svmla_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmla za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat32x2_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x2 (w8, z0, z15),
+ svmla_za32_vg1x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmla za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat32x2_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x2 (w8, z20, z16),
+ svmla_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svfloat32x4_t,
+ svmla_za32_f32_vg1x4 (0, z0, z0),
+ svmla_za32_vg1x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svfloat32x4_t,
+ svmla_za32_f32_vg1x4 (w0, z0, z0),
+ svmla_za32_vg1x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** fmla za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svfloat32x4_t,
+ svmla_za32_f32_vg1x4 (w8, z0, z4),
+ svmla_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** fmla za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svfloat32x4_t,
+ svmla_za32_f32_vg1x4 (w8, z0, z18),
+ svmla_za32_vg1x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z28:
+** ...
+** fmla za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svfloat32x4_t,
+ svmla_za32_f32_vg1x4 (w8, z18, z28),
+ svmla_za32_vg1x4 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z23:
+** ...
+** fmla za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z23, svfloat32x4_t,
+ svmla_za32_f32_vg1x4 (w8, z28, z23),
+ svmla_za32_vg1x4 (w8, z28, z23))
+
+/*
+** mla_w8p7_z4_z0:
+** fmla za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svfloat32x4_t,
+ svmla_za32_f32_vg1x4 (w8 + 7, z4, z0),
+ svmla_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svfloat32x4_t,
+ svmla_za32_f32_vg1x4 (w8 + 8, z4, z4),
+ svmla_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmla za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svfloat32x4_t,
+ svmla_za32_f32_vg1x4 (w8 - 1, z4, z0),
+ svmla_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x4 (0, z1, z0),
+ svmla_za32_vg1x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x4 (w0, z1, z0),
+ svmla_za32_vg1x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** fmla za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x4 (w8, z1, z0),
+ svmla_za32_vg1x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** fmla za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x4 (w8 + 7, z1, z0),
+ svmla_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x4 (w8 + 8, z1, z0),
+ svmla_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmla za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x4 (w0 - 1, z1, z0),
+ svmla_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmla za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat32x4_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x4 (w8, z0, z15),
+ svmla_za32_vg1x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmla za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat32x4_t, svfloat32_t,
+ svmla_single_za32_f32_vg1x4 (w8, z20, z16),
+ svmla_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlal za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_0_z0_z0, svint16_t,
+ svmla_za32_s16_vg2x1 (0, z0, z0),
+ svmla_za32_vg2x1 (0, z0, z0))
+
+/*
+** mla_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w0_z0_z3, svint16_t,
+ svmla_za32_s16_vg2x1 (w0, z0, z3),
+ svmla_za32_vg2x1 (w0, z0, z3))
+
+/*
+** mla_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w7_z0_z3, svint16_t,
+ svmla_za32_s16_vg2x1 (w7, z0, z3),
+ svmla_za32_vg2x1 (w7, z0, z3))
+
+/*
+** mla_w8_z7_z3:
+** smlal za\.s\[w8, 0:1\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z7_z3, svint16_t,
+ svmla_za32_s16_vg2x1 (w8, z7, z3),
+ svmla_za32_vg2x1 (w8, z7, z3))
+
+/*
+** mla_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** smlal za\.s\[w8, 0:1\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z31_z16, svint16_t,
+ svmla_za32_s16_vg2x1 (w8, z31, z16),
+ svmla_za32_vg2x1 (w8, z31, z16))
+
+/*
+** mla_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p1_z0_z0, svint16_t,
+ svmla_za32_s16_vg2x1 (w8 + 1, z0, z0),
+ svmla_za32_vg2x1 (w8 + 1, z0, z0))
+
+/*
+** mla_w8p2_z23_z0:
+** smlal za\.s\[w8, 2:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p2_z23_z0, svint16_t,
+ svmla_za32_s16_vg2x1 (w8 + 2, z23, z0),
+ svmla_za32_vg2x1 (w8 + 2, z23, z0))
+
+/*
+** mla_w11p6_z23_z0:
+** smlal za\.s\[w11, 6:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p6_z23_z0, svint16_t,
+ svmla_za32_s16_vg2x1 (w11 + 6, z23, z0),
+ svmla_za32_vg2x1 (w11 + 6, z23, z0))
+
+/*
+** mla_w8p7_z7_z7:
+** add (w8|w9|w10|w11), w8, #?7
+** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p7_z7_z7, svint16_t,
+ svmla_za32_s16_vg2x1 (w8 + 7, z7, z7),
+ svmla_za32_vg2x1 (w8 + 7, z7, z7))
+
+/*
+** mla_w11p10_z23_z0:
+** smlal za\.s\[w11, 10:11\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p10_z23_z0, svint16_t,
+ svmla_za32_s16_vg2x1 (w11 + 10, z23, z0),
+ svmla_za32_vg2x1 (w11 + 10, z23, z0))
+
+/*
+** mla_w8p14_z23_z0:
+** smlal za\.s\[w8, 14:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p14_z23_z0, svint16_t,
+ svmla_za32_s16_vg2x1 (w8 + 14, z23, z0),
+ svmla_za32_vg2x1 (w8 + 14, z23, z0))
+
+/*
+** mla_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p15_z7_z7, svint16_t,
+ svmla_za32_s16_vg2x1 (w8 + 15, z7, z7),
+ svmla_za32_vg2x1 (w8 + 15, z7, z7))
+
+/*
+** mla_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p16_z7_z7, svint16_t,
+ svmla_za32_s16_vg2x1 (w8 + 16, z7, z7),
+ svmla_za32_vg2x1 (w8 + 16, z7, z7))
+
+/*
+** mla_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8m1_z16_z0, svint16_t,
+ svmla_za32_s16_vg2x1 (w8 - 1, z16, z0),
+ svmla_za32_vg2x1 (w8 - 1, z16, z0))
+
+/*
+** mla_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w12_z0_z3, svint16_t,
+ svmla_za32_s16_vg2x1 (w12, z0, z3),
+ svmla_za32_vg2x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svint16x2_t,
+ svmla_za32_s16_vg2x2 (0, z0, z0),
+ svmla_za32_vg2x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w0, z0, z0),
+ svmla_za32_vg2x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** smlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8, z0, z4),
+ svmla_za32_vg2x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** smlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8, z4, z18),
+ svmla_za32_vg2x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z23:
+** ...
+** smlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8, z0, z23),
+ svmla_za32_vg2x2 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** smlal za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8, z23, z0),
+ svmla_za32_vg2x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z28:
+** smlal za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8, z18, z28),
+ svmla_za32_vg2x2 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z4:
+** smlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z4, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8, z28, z4),
+ svmla_za32_vg2x2 (w8, z28, z4))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8 + 1, z4, z0),
+ svmla_za32_vg2x2 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** smlal za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8 + 2, z4, z0),
+ svmla_za32_vg2x2 (w8 + 2, z4, z0))
+
+/*
+** mla_w8p6_z4_z0:
+** smlal za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p6_z4_z0, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8 + 6, z4, z0),
+ svmla_za32_vg2x2 (w8 + 6, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8 + 7, z4, z0),
+ svmla_za32_vg2x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8 + 8, z4, z4),
+ svmla_za32_vg2x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svint16x2_t,
+ svmla_za32_s16_vg2x2 (w8 - 1, z4, z0),
+ svmla_za32_vg2x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (0, z1, z0),
+ svmla_za32_vg2x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w0, z1, z0),
+ svmla_za32_vg2x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** smlal za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w8, z1, z0),
+ svmla_za32_vg2x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w8 + 1, z1, z0),
+ svmla_za32_vg2x2 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** smlal za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w8 + 4, z20, z0),
+ svmla_za32_vg2x2 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** smlal za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w8 + 6, z27, z0),
+ svmla_za32_vg2x2 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w8 + 7, z1, z0),
+ svmla_za32_vg2x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w8 + 8, z1, z0),
+ svmla_za32_vg2x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w0 - 1, z1, z0),
+ svmla_za32_vg2x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w8, z0, z15),
+ svmla_za32_vg2x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlal za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint16x2_t, svint16_t,
+ svmla_single_za32_s16_vg2x2 (w8, z20, z16),
+ svmla_za32_vg2x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (0, z0, z0),
+ svmla_za32_vg2x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w0, z0, z0),
+ svmla_za32_vg2x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** smlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8, z0, z4),
+ svmla_za32_vg2x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** smlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8, z0, z18),
+ svmla_za32_vg2x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z0:
+** ...
+** smlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8, z18, z0),
+ svmla_za32_vg2x4 (w8, z18, z0))
+
+/*
+** mla_w8_z0_z23:
+** ...
+** smlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8, z0, z23),
+ svmla_za32_vg2x4 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** smlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8, z23, z0),
+ svmla_za32_vg2x4 (w8, z23, z0))
+
+/*
+** mla_w8_z4_z28:
+** smlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8, z4, z28),
+ svmla_za32_vg2x4 (w8, z4, z28))
+
+/*
+** mla_w8_z28_z0:
+** smlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8, z28, z0),
+ svmla_za32_vg2x4 (w8, z28, z0))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8 + 1, z4, z0),
+ svmla_za32_vg2x4 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** smlal za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8 + 2, z4, z0),
+ svmla_za32_vg2x4 (w8 + 2, z4, z0))
+
+/*
+** mla_w8p6_z4_z0:
+** smlal za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p6_z4_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8 + 6, z4, z0),
+ svmla_za32_vg2x4 (w8 + 6, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8 + 7, z4, z0),
+ svmla_za32_vg2x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8 + 8, z4, z4),
+ svmla_za32_vg2x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svint16x4_t,
+ svmla_za32_s16_vg2x4 (w8 - 1, z4, z0),
+ svmla_za32_vg2x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (0, z1, z0),
+ svmla_za32_vg2x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w0, z1, z0),
+ svmla_za32_vg2x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** smlal za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w8, z1, z0),
+ svmla_za32_vg2x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w8 + 1, z1, z0),
+ svmla_za32_vg2x4 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** smlal za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w8 + 4, z20, z0),
+ svmla_za32_vg2x4 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** smlal za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w8 + 6, z27, z0),
+ svmla_za32_vg2x4 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w8 + 7, z1, z0),
+ svmla_za32_vg2x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w8 + 8, z1, z0),
+ svmla_za32_vg2x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w0 - 1, z1, z0),
+ svmla_za32_vg2x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w8, z0, z15),
+ svmla_za32_vg2x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlal za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint16x4_t, svint16_t,
+ svmla_single_za32_s16_vg2x4 (w8, z20, z16),
+ svmla_za32_vg2x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.s\[\1, 0:3\], z0\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_0_z0_z0, svint8_t,
+ svmla_za32_s8_vg4x1 (0, z0, z0),
+ svmla_za32_vg4x1 (0, z0, z0))
+
+/*
+** mla_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w0_z0_z3, svint8_t,
+ svmla_za32_s8_vg4x1 (w0, z0, z3),
+ svmla_za32_vg4x1 (w0, z0, z3))
+
+/*
+** mla_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w7_z0_z3, svint8_t,
+ svmla_za32_s8_vg4x1 (w7, z0, z3),
+ svmla_za32_vg4x1 (w7, z0, z3))
+
+/*
+** mla_w8_z7_z3:
+** smlall za\.s\[w8, 0:3\], z7\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z7_z3, svint8_t,
+ svmla_za32_s8_vg4x1 (w8, z7, z3),
+ svmla_za32_vg4x1 (w8, z7, z3))
+
+/*
+** mla_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** smlall za\.s\[w8, 0:3\], z31\.b. \1\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z31_z16, svint8_t,
+ svmla_za32_s8_vg4x1 (w8, z31, z16),
+ svmla_za32_vg4x1 (w8, z31, z16))
+
+/*
+** mla_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3\], z0\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8p1_z0_z0, svint8_t,
+ svmla_za32_s8_vg4x1 (w8 + 1, z0, z0),
+ svmla_za32_vg4x1 (w8 + 1, z0, z0))
+
+/*
+** mla_w10p4_z23_z0:
+** smlall za\.s\[w10, 4:7\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w10p4_z23_z0, svint8_t,
+ svmla_za32_s8_vg4x1 (w10 + 4, z23, z0),
+ svmla_za32_vg4x1 (w10 + 4, z23, z0))
+
+/*
+** mla_w11p6_z23_z0:
+** add (w8|w9|w10|w11), w11, #?6
+** smlall za\.s\[\1, 0:3\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w11p6_z23_z0, svint8_t,
+ svmla_za32_s8_vg4x1 (w11 + 6, z23, z0),
+ svmla_za32_vg4x1 (w11 + 6, z23, z0))
+
+/*
+** mla_w9p8_z7_z7:
+** smlall za\.s\[w9, 8:11\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w9p8_z7_z7, svint8_t,
+ svmla_za32_s8_vg4x1 (w9 + 8, z7, z7),
+ svmla_za32_vg4x1 (w9 + 8, z7, z7))
+
+/*
+** mla_w11p12_z23_z0:
+** smlall za\.s\[w11, 12:15\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w11p12_z23_z0, svint8_t,
+ svmla_za32_s8_vg4x1 (w11 + 12, z23, z0),
+ svmla_za32_vg4x1 (w11 + 12, z23, z0))
+
+/*
+** mla_w8p14_z23_z0:
+** add (w8|w9|w10|w11), w8, #?14
+** smlall za\.s\[\1, 0:3\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8p14_z23_z0, svint8_t,
+ svmla_za32_s8_vg4x1 (w8 + 14, z23, z0),
+ svmla_za32_vg4x1 (w8 + 14, z23, z0))
+
+/*
+** mla_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8p15_z7_z7, svint8_t,
+ svmla_za32_s8_vg4x1 (w8 + 15, z7, z7),
+ svmla_za32_vg4x1 (w8 + 15, z7, z7))
+
+/*
+** mla_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8p16_z7_z7, svint8_t,
+ svmla_za32_s8_vg4x1 (w8 + 16, z7, z7),
+ svmla_za32_vg4x1 (w8 + 16, z7, z7))
+
+/*
+** mla_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3\], z16\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8m1_z16_z0, svint8_t,
+ svmla_za32_s8_vg4x1 (w8 - 1, z16, z0),
+ svmla_za32_vg4x1 (w8 - 1, z16, z0))
+
+/*
+** mla_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w12_z0_z3, svint8_t,
+ svmla_za32_s8_vg4x1 (w12, z0, z3),
+ svmla_za32_vg4x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svint8x2_t,
+ svmla_za32_s8_vg4x2 (0, z0, z0),
+ svmla_za32_vg4x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w0, z0, z0),
+ svmla_za32_vg4x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** smlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8, z0, z4),
+ svmla_za32_vg4x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** smlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8, z4, z18),
+ svmla_za32_vg4x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z23:
+** ...
+** smlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8, z0, z23),
+ svmla_za32_vg4x2 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** smlall za\.s\[w8, 0:3, vgx2\], [^\n]+, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8, z23, z0),
+ svmla_za32_vg4x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z28:
+** smlall za\.s\[w8, 0:3, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8, z18, z28),
+ svmla_za32_vg4x2 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z4:
+** smlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z4, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8, z28, z4),
+ svmla_za32_vg4x2 (w8, z28, z4))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8 + 1, z4, z0),
+ svmla_za32_vg4x2 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8 + 2, z4, z0),
+ svmla_za32_vg4x2 (w8 + 2, z4, z0))
+
+/*
+** mla_w11p4_z4_z0:
+** smlall za\.s\[w11, 4:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w11p4_z4_z0, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w11 + 4, z4, z0),
+ svmla_za32_vg4x2 (w11 + 4, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8 + 7, z4, z0),
+ svmla_za32_vg4x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8 + 8, z4, z4),
+ svmla_za32_vg4x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svint8x2_t,
+ svmla_za32_s8_vg4x2 (w8 - 1, z4, z0),
+ svmla_za32_vg4x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (0, z1, z0),
+ svmla_za32_vg4x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w0, z1, z0),
+ svmla_za32_vg4x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** smlall za\.s\[w8, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w8, z1, z0),
+ svmla_za32_vg4x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w8 + 1, z1, z0),
+ svmla_za32_vg4x2 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p2_z20_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlall za\.s\[\1, 0:3, vgx2\], {z20\.b - z21\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w8 + 2, z20, z0),
+ svmla_za32_vg4x2 (w8 + 2, z20, z0))
+
+/*
+** mla_single_w11p4_z27_z0:
+** smlall za\.s\[w11, 4:7, vgx2\], {z27\.b - z28\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w11p4_z27_z0, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w11 + 4, z27, z0),
+ svmla_za32_vg4x2 (w11 + 4, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w8 + 7, z1, z0),
+ svmla_za32_vg4x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w8 + 8, z1, z0),
+ svmla_za32_vg4x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w0 - 1, z1, z0),
+ svmla_za32_vg4x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w8, z0, z15),
+ svmla_za32_vg4x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlall za\.s\[w8, 0:3, vgx2\], {z20\.b - z21\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint8x2_t, svint8_t,
+ svmla_single_za32_s8_vg4x2 (w8, z20, z16),
+ svmla_za32_vg4x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (0, z0, z0),
+ svmla_za32_vg4x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w0, z0, z0),
+ svmla_za32_vg4x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** smlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8, z0, z4),
+ svmla_za32_vg4x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** smlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8, z0, z18),
+ svmla_za32_vg4x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z0:
+** ...
+** smlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8, z18, z0),
+ svmla_za32_vg4x4 (w8, z18, z0))
+
+/*
+** mla_w8_z0_z23:
+** ...
+** smlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8, z0, z23),
+ svmla_za32_vg4x4 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** smlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8, z23, z0),
+ svmla_za32_vg4x4 (w8, z23, z0))
+
+/*
+** mla_w8_z4_z28:
+** smlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8, z4, z28),
+ svmla_za32_vg4x4 (w8, z4, z28))
+
+/*
+** mla_w8_z28_z0:
+** smlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8, z28, z0),
+ svmla_za32_vg4x4 (w8, z28, z0))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8 + 1, z4, z0),
+ svmla_za32_vg4x4 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8 + 2, z4, z0),
+ svmla_za32_vg4x4 (w8 + 2, z4, z0))
+
+/*
+** mla_w11p4_z4_z0:
+** smlall za\.s\[w11, 4:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w11p4_z4_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w11 + 4, z4, z0),
+ svmla_za32_vg4x4 (w11 + 4, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8 + 7, z4, z0),
+ svmla_za32_vg4x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8 + 8, z4, z4),
+ svmla_za32_vg4x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svint8x4_t,
+ svmla_za32_s8_vg4x4 (w8 - 1, z4, z0),
+ svmla_za32_vg4x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (0, z1, z0),
+ svmla_za32_vg4x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w0, z1, z0),
+ svmla_za32_vg4x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** smlall za\.s\[w8, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w8, z1, z0),
+ svmla_za32_vg4x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w8 + 1, z1, z0),
+ svmla_za32_vg4x4 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** smlall za\.s\[w8, 4:7, vgx4\], {z20\.b - z23\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w8 + 4, z20, z0),
+ svmla_za32_vg4x4 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** add (w8|w9|w10|w11), w8, #?6
+** smlall za\.s\[\1, 0:3, vgx4\], {z27\.b - z30\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w8 + 6, z27, z0),
+ svmla_za32_vg4x4 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w8 + 7, z1, z0),
+ svmla_za32_vg4x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w8 + 8, z1, z0),
+ svmla_za32_vg4x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w0 - 1, z1, z0),
+ svmla_za32_vg4x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w8, z0, z15),
+ svmla_za32_vg4x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlall za\.s\[w8, 0:3, vgx4\], {z20\.b - z23\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint8x4_t, svint8_t,
+ svmla_single_za32_s8_vg4x4 (w8, z20, z16),
+ svmla_za32_vg4x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlal za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_0_z0_z0, svuint16_t,
+ svmla_za32_u16_vg2x1 (0, z0, z0),
+ svmla_za32_vg2x1 (0, z0, z0))
+
+/*
+** mla_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w0_z0_z3, svuint16_t,
+ svmla_za32_u16_vg2x1 (w0, z0, z3),
+ svmla_za32_vg2x1 (w0, z0, z3))
+
+/*
+** mla_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w7_z0_z3, svuint16_t,
+ svmla_za32_u16_vg2x1 (w7, z0, z3),
+ svmla_za32_vg2x1 (w7, z0, z3))
+
+/*
+** mla_w8_z7_z3:
+** umlal za\.s\[w8, 0:1\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z7_z3, svuint16_t,
+ svmla_za32_u16_vg2x1 (w8, z7, z3),
+ svmla_za32_vg2x1 (w8, z7, z3))
+
+/*
+** mla_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** umlal za\.s\[w8, 0:1\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z31_z16, svuint16_t,
+ svmla_za32_u16_vg2x1 (w8, z31, z16),
+ svmla_za32_vg2x1 (w8, z31, z16))
+
+/*
+** mla_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p1_z0_z0, svuint16_t,
+ svmla_za32_u16_vg2x1 (w8 + 1, z0, z0),
+ svmla_za32_vg2x1 (w8 + 1, z0, z0))
+
+/*
+** mla_w8p2_z23_z0:
+** umlal za\.s\[w8, 2:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p2_z23_z0, svuint16_t,
+ svmla_za32_u16_vg2x1 (w8 + 2, z23, z0),
+ svmla_za32_vg2x1 (w8 + 2, z23, z0))
+
+/*
+** mla_w11p6_z23_z0:
+** umlal za\.s\[w11, 6:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p6_z23_z0, svuint16_t,
+ svmla_za32_u16_vg2x1 (w11 + 6, z23, z0),
+ svmla_za32_vg2x1 (w11 + 6, z23, z0))
+
+/*
+** mla_w8p7_z7_z7:
+** add (w8|w9|w10|w11), w8, #?7
+** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p7_z7_z7, svuint16_t,
+ svmla_za32_u16_vg2x1 (w8 + 7, z7, z7),
+ svmla_za32_vg2x1 (w8 + 7, z7, z7))
+
+/*
+** mla_w11p10_z23_z0:
+** umlal za\.s\[w11, 10:11\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p10_z23_z0, svuint16_t,
+ svmla_za32_u16_vg2x1 (w11 + 10, z23, z0),
+ svmla_za32_vg2x1 (w11 + 10, z23, z0))
+
+/*
+** mla_w8p14_z23_z0:
+** umlal za\.s\[w8, 14:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p14_z23_z0, svuint16_t,
+ svmla_za32_u16_vg2x1 (w8 + 14, z23, z0),
+ svmla_za32_vg2x1 (w8 + 14, z23, z0))
+
+/*
+** mla_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p15_z7_z7, svuint16_t,
+ svmla_za32_u16_vg2x1 (w8 + 15, z7, z7),
+ svmla_za32_vg2x1 (w8 + 15, z7, z7))
+
+/*
+** mla_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p16_z7_z7, svuint16_t,
+ svmla_za32_u16_vg2x1 (w8 + 16, z7, z7),
+ svmla_za32_vg2x1 (w8 + 16, z7, z7))
+
+/*
+** mla_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8m1_z16_z0, svuint16_t,
+ svmla_za32_u16_vg2x1 (w8 - 1, z16, z0),
+ svmla_za32_vg2x1 (w8 - 1, z16, z0))
+
+/*
+** mla_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w12_z0_z3, svuint16_t,
+ svmla_za32_u16_vg2x1 (w12, z0, z3),
+ svmla_za32_vg2x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (0, z0, z0),
+ svmla_za32_vg2x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w0, z0, z0),
+ svmla_za32_vg2x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** umlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8, z0, z4),
+ svmla_za32_vg2x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** umlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8, z4, z18),
+ svmla_za32_vg2x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z23:
+** ...
+** umlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8, z0, z23),
+ svmla_za32_vg2x2 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** umlal za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8, z23, z0),
+ svmla_za32_vg2x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z28:
+** umlal za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8, z18, z28),
+ svmla_za32_vg2x2 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z4:
+** umlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z4, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8, z28, z4),
+ svmla_za32_vg2x2 (w8, z28, z4))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8 + 1, z4, z0),
+ svmla_za32_vg2x2 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** umlal za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8 + 2, z4, z0),
+ svmla_za32_vg2x2 (w8 + 2, z4, z0))
+
+/*
+** mla_w8p6_z4_z0:
+** umlal za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p6_z4_z0, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8 + 6, z4, z0),
+ svmla_za32_vg2x2 (w8 + 6, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8 + 7, z4, z0),
+ svmla_za32_vg2x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8 + 8, z4, z4),
+ svmla_za32_vg2x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svuint16x2_t,
+ svmla_za32_u16_vg2x2 (w8 - 1, z4, z0),
+ svmla_za32_vg2x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (0, z1, z0),
+ svmla_za32_vg2x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w0, z1, z0),
+ svmla_za32_vg2x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** umlal za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w8, z1, z0),
+ svmla_za32_vg2x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w8 + 1, z1, z0),
+ svmla_za32_vg2x2 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** umlal za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w8 + 4, z20, z0),
+ svmla_za32_vg2x2 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** umlal za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w8 + 6, z27, z0),
+ svmla_za32_vg2x2 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w8 + 7, z1, z0),
+ svmla_za32_vg2x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w8 + 8, z1, z0),
+ svmla_za32_vg2x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w0 - 1, z1, z0),
+ svmla_za32_vg2x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w8, z0, z15),
+ svmla_za32_vg2x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlal za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint16x2_t, svuint16_t,
+ svmla_single_za32_u16_vg2x2 (w8, z20, z16),
+ svmla_za32_vg2x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (0, z0, z0),
+ svmla_za32_vg2x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w0, z0, z0),
+ svmla_za32_vg2x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** umlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8, z0, z4),
+ svmla_za32_vg2x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** umlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8, z0, z18),
+ svmla_za32_vg2x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z0:
+** ...
+** umlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8, z18, z0),
+ svmla_za32_vg2x4 (w8, z18, z0))
+
+/*
+** mla_w8_z0_z23:
+** ...
+** umlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8, z0, z23),
+ svmla_za32_vg2x4 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** umlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8, z23, z0),
+ svmla_za32_vg2x4 (w8, z23, z0))
+
+/*
+** mla_w8_z4_z28:
+** umlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8, z4, z28),
+ svmla_za32_vg2x4 (w8, z4, z28))
+
+/*
+** mla_w8_z28_z0:
+** umlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8, z28, z0),
+ svmla_za32_vg2x4 (w8, z28, z0))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8 + 1, z4, z0),
+ svmla_za32_vg2x4 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** umlal za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8 + 2, z4, z0),
+ svmla_za32_vg2x4 (w8 + 2, z4, z0))
+
+/*
+** mla_w8p6_z4_z0:
+** umlal za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p6_z4_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8 + 6, z4, z0),
+ svmla_za32_vg2x4 (w8 + 6, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8 + 7, z4, z0),
+ svmla_za32_vg2x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8 + 8, z4, z4),
+ svmla_za32_vg2x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svuint16x4_t,
+ svmla_za32_u16_vg2x4 (w8 - 1, z4, z0),
+ svmla_za32_vg2x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (0, z1, z0),
+ svmla_za32_vg2x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w0, z1, z0),
+ svmla_za32_vg2x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** umlal za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w8, z1, z0),
+ svmla_za32_vg2x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w8 + 1, z1, z0),
+ svmla_za32_vg2x4 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** umlal za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w8 + 4, z20, z0),
+ svmla_za32_vg2x4 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** umlal za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w8 + 6, z27, z0),
+ svmla_za32_vg2x4 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w8 + 7, z1, z0),
+ svmla_za32_vg2x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w8 + 8, z1, z0),
+ svmla_za32_vg2x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w0 - 1, z1, z0),
+ svmla_za32_vg2x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w8, z0, z15),
+ svmla_za32_vg2x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlal za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint16x4_t, svuint16_t,
+ svmla_single_za32_u16_vg2x4 (w8, z20, z16),
+ svmla_za32_vg2x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.s\[\1, 0:3\], z0\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_0_z0_z0, svuint8_t,
+ svmla_za32_u8_vg4x1 (0, z0, z0),
+ svmla_za32_vg4x1 (0, z0, z0))
+
+/*
+** mla_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w0_z0_z3, svuint8_t,
+ svmla_za32_u8_vg4x1 (w0, z0, z3),
+ svmla_za32_vg4x1 (w0, z0, z3))
+
+/*
+** mla_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w7_z0_z3, svuint8_t,
+ svmla_za32_u8_vg4x1 (w7, z0, z3),
+ svmla_za32_vg4x1 (w7, z0, z3))
+
+/*
+** mla_w8_z7_z3:
+** umlall za\.s\[w8, 0:3\], z7\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z7_z3, svuint8_t,
+ svmla_za32_u8_vg4x1 (w8, z7, z3),
+ svmla_za32_vg4x1 (w8, z7, z3))
+
+/*
+** mla_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** umlall za\.s\[w8, 0:3\], z31\.b. \1\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z31_z16, svuint8_t,
+ svmla_za32_u8_vg4x1 (w8, z31, z16),
+ svmla_za32_vg4x1 (w8, z31, z16))
+
+/*
+** mla_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3\], z0\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8p1_z0_z0, svuint8_t,
+ svmla_za32_u8_vg4x1 (w8 + 1, z0, z0),
+ svmla_za32_vg4x1 (w8 + 1, z0, z0))
+
+/*
+** mla_w10p4_z23_z0:
+** umlall za\.s\[w10, 4:7\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w10p4_z23_z0, svuint8_t,
+ svmla_za32_u8_vg4x1 (w10 + 4, z23, z0),
+ svmla_za32_vg4x1 (w10 + 4, z23, z0))
+
+/*
+** mla_w11p6_z23_z0:
+** add (w8|w9|w10|w11), w11, #?6
+** umlall za\.s\[\1, 0:3\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w11p6_z23_z0, svuint8_t,
+ svmla_za32_u8_vg4x1 (w11 + 6, z23, z0),
+ svmla_za32_vg4x1 (w11 + 6, z23, z0))
+
+/*
+** mla_w9p8_z7_z7:
+** umlall za\.s\[w9, 8:11\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w9p8_z7_z7, svuint8_t,
+ svmla_za32_u8_vg4x1 (w9 + 8, z7, z7),
+ svmla_za32_vg4x1 (w9 + 8, z7, z7))
+
+/*
+** mla_w11p12_z23_z0:
+** umlall za\.s\[w11, 12:15\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w11p12_z23_z0, svuint8_t,
+ svmla_za32_u8_vg4x1 (w11 + 12, z23, z0),
+ svmla_za32_vg4x1 (w11 + 12, z23, z0))
+
+/*
+** mla_w8p14_z23_z0:
+** add (w8|w9|w10|w11), w8, #?14
+** umlall za\.s\[\1, 0:3\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8p14_z23_z0, svuint8_t,
+ svmla_za32_u8_vg4x1 (w8 + 14, z23, z0),
+ svmla_za32_vg4x1 (w8 + 14, z23, z0))
+
+/*
+** mla_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8p15_z7_z7, svuint8_t,
+ svmla_za32_u8_vg4x1 (w8 + 15, z7, z7),
+ svmla_za32_vg4x1 (w8 + 15, z7, z7))
+
+/*
+** mla_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8p16_z7_z7, svuint8_t,
+ svmla_za32_u8_vg4x1 (w8 + 16, z7, z7),
+ svmla_za32_vg4x1 (w8 + 16, z7, z7))
+
+/*
+** mla_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3\], z16\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w8m1_z16_z0, svuint8_t,
+ svmla_za32_u8_vg4x1 (w8 - 1, z16, z0),
+ svmla_za32_vg4x1 (w8 - 1, z16, z0))
+
+/*
+** mla_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mla_w12_z0_z3, svuint8_t,
+ svmla_za32_u8_vg4x1 (w12, z0, z3),
+ svmla_za32_vg4x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (0, z0, z0),
+ svmla_za32_vg4x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w0, z0, z0),
+ svmla_za32_vg4x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** umlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8, z0, z4),
+ svmla_za32_vg4x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** umlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8, z4, z18),
+ svmla_za32_vg4x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z23:
+** ...
+** umlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8, z0, z23),
+ svmla_za32_vg4x2 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** umlall za\.s\[w8, 0:3, vgx2\], [^\n]+, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8, z23, z0),
+ svmla_za32_vg4x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z28:
+** umlall za\.s\[w8, 0:3, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8, z18, z28),
+ svmla_za32_vg4x2 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z4:
+** umlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z4, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8, z28, z4),
+ svmla_za32_vg4x2 (w8, z28, z4))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8 + 1, z4, z0),
+ svmla_za32_vg4x2 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8 + 2, z4, z0),
+ svmla_za32_vg4x2 (w8 + 2, z4, z0))
+
+/*
+** mla_w11p4_z4_z0:
+** umlall za\.s\[w11, 4:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w11p4_z4_z0, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w11 + 4, z4, z0),
+ svmla_za32_vg4x2 (w11 + 4, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8 + 7, z4, z0),
+ svmla_za32_vg4x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8 + 8, z4, z4),
+ svmla_za32_vg4x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svuint8x2_t,
+ svmla_za32_u8_vg4x2 (w8 - 1, z4, z0),
+ svmla_za32_vg4x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (0, z1, z0),
+ svmla_za32_vg4x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w0, z1, z0),
+ svmla_za32_vg4x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** umlall za\.s\[w8, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w8, z1, z0),
+ svmla_za32_vg4x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w8 + 1, z1, z0),
+ svmla_za32_vg4x2 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p2_z20_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlall za\.s\[\1, 0:3, vgx2\], {z20\.b - z21\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w8 + 2, z20, z0),
+ svmla_za32_vg4x2 (w8 + 2, z20, z0))
+
+/*
+** mla_single_w11p4_z27_z0:
+** umlall za\.s\[w11, 4:7, vgx2\], {z27\.b - z28\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w11p4_z27_z0, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w11 + 4, z27, z0),
+ svmla_za32_vg4x2 (w11 + 4, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w8 + 7, z1, z0),
+ svmla_za32_vg4x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w8 + 8, z1, z0),
+ svmla_za32_vg4x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w0 - 1, z1, z0),
+ svmla_za32_vg4x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w8, z0, z15),
+ svmla_za32_vg4x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlall za\.s\[w8, 0:3, vgx2\], {z20\.b - z21\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint8x2_t, svuint8_t,
+ svmla_single_za32_u8_vg4x2 (w8, z20, z16),
+ svmla_za32_vg4x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (0, z0, z0),
+ svmla_za32_vg4x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w0, z0, z0),
+ svmla_za32_vg4x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** umlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8, z0, z4),
+ svmla_za32_vg4x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** umlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8, z0, z18),
+ svmla_za32_vg4x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z0:
+** ...
+** umlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8, z18, z0),
+ svmla_za32_vg4x4 (w8, z18, z0))
+
+/*
+** mla_w8_z0_z23:
+** ...
+** umlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8, z0, z23),
+ svmla_za32_vg4x4 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** umlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8, z23, z0),
+ svmla_za32_vg4x4 (w8, z23, z0))
+
+/*
+** mla_w8_z4_z28:
+** umlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8, z4, z28),
+ svmla_za32_vg4x4 (w8, z4, z28))
+
+/*
+** mla_w8_z28_z0:
+** umlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8, z28, z0),
+ svmla_za32_vg4x4 (w8, z28, z0))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8 + 1, z4, z0),
+ svmla_za32_vg4x4 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8 + 2, z4, z0),
+ svmla_za32_vg4x4 (w8 + 2, z4, z0))
+
+/*
+** mla_w11p4_z4_z0:
+** umlall za\.s\[w11, 4:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w11p4_z4_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w11 + 4, z4, z0),
+ svmla_za32_vg4x4 (w11 + 4, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8 + 7, z4, z0),
+ svmla_za32_vg4x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8 + 8, z4, z4),
+ svmla_za32_vg4x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svuint8x4_t,
+ svmla_za32_u8_vg4x4 (w8 - 1, z4, z0),
+ svmla_za32_vg4x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (0, z1, z0),
+ svmla_za32_vg4x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w0, z1, z0),
+ svmla_za32_vg4x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** umlall za\.s\[w8, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w8, z1, z0),
+ svmla_za32_vg4x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w8 + 1, z1, z0),
+ svmla_za32_vg4x4 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** umlall za\.s\[w8, 4:7, vgx4\], {z20\.b - z23\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w8 + 4, z20, z0),
+ svmla_za32_vg4x4 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** add (w8|w9|w10|w11), w8, #?6
+** umlall za\.s\[\1, 0:3, vgx4\], {z27\.b - z30\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w8 + 6, z27, z0),
+ svmla_za32_vg4x4 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w8 + 7, z1, z0),
+ svmla_za32_vg4x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w8 + 8, z1, z0),
+ svmla_za32_vg4x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w0 - 1, z1, z0),
+ svmla_za32_vg4x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w8, z0, z15),
+ svmla_za32_vg4x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlall za\.s\[w8, 0:3, vgx4\], {z20\.b - z23\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint8x4_t, svuint8_t,
+ svmla_single_za32_u8_vg4x4 (w8, z20, z16),
+ svmla_za32_vg4x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (0, z0, z0),
+ svmla_za64_vg1x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (w0, z0, z0),
+ svmla_za64_vg1x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** fmla za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (w8, z0, z4),
+ svmla_za64_vg1x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** fmla za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (w8, z4, z18),
+ svmla_za64_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z23_z0:
+** ...
+** fmla za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (w8, z23, z0),
+ svmla_za64_vg1x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z23:
+** ...
+** fmla za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z23, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (w8, z18, z23),
+ svmla_za64_vg1x2 (w8, z18, z23))
+
+/*
+** mla_w8_z4_z28:
+** fmla za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (w8, z4, z28),
+ svmla_za64_vg1x2 (w8, z4, z28))
+
+/*
+** mla_w8p7_z4_z0:
+** fmla za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (w8 + 7, z4, z0),
+ svmla_za64_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (w8 + 8, z4, z4),
+ svmla_za64_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmla za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svfloat64x2_t,
+ svmla_za64_f64_vg1x2 (w8 - 1, z4, z0),
+ svmla_za64_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x2 (0, z1, z0),
+ svmla_za64_vg1x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x2 (w0, z1, z0),
+ svmla_za64_vg1x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** fmla za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x2 (w8, z1, z0),
+ svmla_za64_vg1x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** fmla za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x2 (w8 + 7, z1, z0),
+ svmla_za64_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x2 (w8 + 8, z1, z0),
+ svmla_za64_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmla za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x2 (w0 - 1, z1, z0),
+ svmla_za64_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmla za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat64x2_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x2 (w8, z0, z15),
+ svmla_za64_vg1x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmla za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat64x2_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x2 (w8, z20, z16),
+ svmla_za64_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svfloat64x4_t,
+ svmla_za64_f64_vg1x4 (0, z0, z0),
+ svmla_za64_vg1x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svfloat64x4_t,
+ svmla_za64_f64_vg1x4 (w0, z0, z0),
+ svmla_za64_vg1x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** fmla za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svfloat64x4_t,
+ svmla_za64_f64_vg1x4 (w8, z0, z4),
+ svmla_za64_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** fmla za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svfloat64x4_t,
+ svmla_za64_f64_vg1x4 (w8, z0, z18),
+ svmla_za64_vg1x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z28:
+** ...
+** fmla za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svfloat64x4_t,
+ svmla_za64_f64_vg1x4 (w8, z18, z28),
+ svmla_za64_vg1x4 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z23:
+** ...
+** fmla za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z23, svfloat64x4_t,
+ svmla_za64_f64_vg1x4 (w8, z28, z23),
+ svmla_za64_vg1x4 (w8, z28, z23))
+
+/*
+** mla_w8p7_z4_z0:
+** fmla za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svfloat64x4_t,
+ svmla_za64_f64_vg1x4 (w8 + 7, z4, z0),
+ svmla_za64_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svfloat64x4_t,
+ svmla_za64_f64_vg1x4 (w8 + 8, z4, z4),
+ svmla_za64_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmla za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svfloat64x4_t,
+ svmla_za64_f64_vg1x4 (w8 - 1, z4, z0),
+ svmla_za64_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmla za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x4 (0, z1, z0),
+ svmla_za64_vg1x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmla za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x4 (w0, z1, z0),
+ svmla_za64_vg1x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** fmla za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x4 (w8, z1, z0),
+ svmla_za64_vg1x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** fmla za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x4 (w8 + 7, z1, z0),
+ svmla_za64_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmla za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x4 (w8 + 8, z1, z0),
+ svmla_za64_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmla za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x4 (w0 - 1, z1, z0),
+ svmla_za64_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmla za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat64x4_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x4 (w8, z0, z15),
+ svmla_za64_vg1x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmla za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat64x4_t, svfloat64_t,
+ svmla_single_za64_f64_vg1x4 (w8, z20, z16),
+ svmla_za64_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.d\[\1, 0:3\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_0_z0_z0, svint16_t,
+ svmla_za64_s16_vg4x1 (0, z0, z0),
+ svmla_za64_vg4x1 (0, z0, z0))
+
+/*
+** mla_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w0_z0_z3, svint16_t,
+ svmla_za64_s16_vg4x1 (w0, z0, z3),
+ svmla_za64_vg4x1 (w0, z0, z3))
+
+/*
+** mla_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w7_z0_z3, svint16_t,
+ svmla_za64_s16_vg4x1 (w7, z0, z3),
+ svmla_za64_vg4x1 (w7, z0, z3))
+
+/*
+** mla_w8_z7_z3:
+** smlall za\.d\[w8, 0:3\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z7_z3, svint16_t,
+ svmla_za64_s16_vg4x1 (w8, z7, z3),
+ svmla_za64_vg4x1 (w8, z7, z3))
+
+/*
+** mla_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** smlall za\.d\[w8, 0:3\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z31_z16, svint16_t,
+ svmla_za64_s16_vg4x1 (w8, z31, z16),
+ svmla_za64_vg4x1 (w8, z31, z16))
+
+/*
+** mla_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p1_z0_z0, svint16_t,
+ svmla_za64_s16_vg4x1 (w8 + 1, z0, z0),
+ svmla_za64_vg4x1 (w8 + 1, z0, z0))
+
+/*
+** mla_w10p4_z23_z0:
+** smlall za\.d\[w10, 4:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w10p4_z23_z0, svint16_t,
+ svmla_za64_s16_vg4x1 (w10 + 4, z23, z0),
+ svmla_za64_vg4x1 (w10 + 4, z23, z0))
+
+/*
+** mla_w11p6_z23_z0:
+** add (w8|w9|w10|w11), w11, #?6
+** smlall za\.d\[\1, 0:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p6_z23_z0, svint16_t,
+ svmla_za64_s16_vg4x1 (w11 + 6, z23, z0),
+ svmla_za64_vg4x1 (w11 + 6, z23, z0))
+
+/*
+** mla_w9p8_z7_z7:
+** smlall za\.d\[w9, 8:11\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w9p8_z7_z7, svint16_t,
+ svmla_za64_s16_vg4x1 (w9 + 8, z7, z7),
+ svmla_za64_vg4x1 (w9 + 8, z7, z7))
+
+/*
+** mla_w11p12_z23_z0:
+** smlall za\.d\[w11, 12:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p12_z23_z0, svint16_t,
+ svmla_za64_s16_vg4x1 (w11 + 12, z23, z0),
+ svmla_za64_vg4x1 (w11 + 12, z23, z0))
+
+/*
+** mla_w8p14_z23_z0:
+** add (w8|w9|w10|w11), w8, #?14
+** smlall za\.d\[\1, 0:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p14_z23_z0, svint16_t,
+ svmla_za64_s16_vg4x1 (w8 + 14, z23, z0),
+ svmla_za64_vg4x1 (w8 + 14, z23, z0))
+
+/*
+** mla_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p15_z7_z7, svint16_t,
+ svmla_za64_s16_vg4x1 (w8 + 15, z7, z7),
+ svmla_za64_vg4x1 (w8 + 15, z7, z7))
+
+/*
+** mla_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p16_z7_z7, svint16_t,
+ svmla_za64_s16_vg4x1 (w8 + 16, z7, z7),
+ svmla_za64_vg4x1 (w8 + 16, z7, z7))
+
+/*
+** mla_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8m1_z16_z0, svint16_t,
+ svmla_za64_s16_vg4x1 (w8 - 1, z16, z0),
+ svmla_za64_vg4x1 (w8 - 1, z16, z0))
+
+/*
+** mla_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w12_z0_z3, svint16_t,
+ svmla_za64_s16_vg4x1 (w12, z0, z3),
+ svmla_za64_vg4x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svint16x2_t,
+ svmla_za64_s16_vg4x2 (0, z0, z0),
+ svmla_za64_vg4x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w0, z0, z0),
+ svmla_za64_vg4x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** smlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8, z0, z4),
+ svmla_za64_vg4x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** smlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8, z4, z18),
+ svmla_za64_vg4x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z23:
+** ...
+** smlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8, z0, z23),
+ svmla_za64_vg4x2 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** smlall za\.d\[w8, 0:3, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8, z23, z0),
+ svmla_za64_vg4x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z28:
+** smlall za\.d\[w8, 0:3, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8, z18, z28),
+ svmla_za64_vg4x2 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z4:
+** smlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z4, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8, z28, z4),
+ svmla_za64_vg4x2 (w8, z28, z4))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8 + 1, z4, z0),
+ svmla_za64_vg4x2 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8 + 2, z4, z0),
+ svmla_za64_vg4x2 (w8 + 2, z4, z0))
+
+/*
+** mla_w11p4_z4_z0:
+** smlall za\.d\[w11, 4:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w11p4_z4_z0, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w11 + 4, z4, z0),
+ svmla_za64_vg4x2 (w11 + 4, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8 + 7, z4, z0),
+ svmla_za64_vg4x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8 + 8, z4, z4),
+ svmla_za64_vg4x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svint16x2_t,
+ svmla_za64_s16_vg4x2 (w8 - 1, z4, z0),
+ svmla_za64_vg4x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (0, z1, z0),
+ svmla_za64_vg4x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w0, z1, z0),
+ svmla_za64_vg4x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** smlall za\.d\[w8, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w8, z1, z0),
+ svmla_za64_vg4x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w8 + 1, z1, z0),
+ svmla_za64_vg4x2 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p2_z20_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlall za\.d\[\1, 0:3, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w8 + 2, z20, z0),
+ svmla_za64_vg4x2 (w8 + 2, z20, z0))
+
+/*
+** mla_single_w11p4_z27_z0:
+** smlall za\.d\[w11, 4:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w11p4_z27_z0, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w11 + 4, z27, z0),
+ svmla_za64_vg4x2 (w11 + 4, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w8 + 7, z1, z0),
+ svmla_za64_vg4x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w8 + 8, z1, z0),
+ svmla_za64_vg4x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w0 - 1, z1, z0),
+ svmla_za64_vg4x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w8, z0, z15),
+ svmla_za64_vg4x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlall za\.d\[w8, 0:3, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint16x2_t, svint16_t,
+ svmla_single_za64_s16_vg4x2 (w8, z20, z16),
+ svmla_za64_vg4x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (0, z0, z0),
+ svmla_za64_vg4x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w0, z0, z0),
+ svmla_za64_vg4x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** smlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8, z0, z4),
+ svmla_za64_vg4x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** smlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8, z0, z18),
+ svmla_za64_vg4x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z0:
+** ...
+** smlall za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8, z18, z0),
+ svmla_za64_vg4x4 (w8, z18, z0))
+
+/*
+** mla_w8_z0_z23:
+** ...
+** smlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8, z0, z23),
+ svmla_za64_vg4x4 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** smlall za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8, z23, z0),
+ svmla_za64_vg4x4 (w8, z23, z0))
+
+/*
+** mla_w8_z4_z28:
+** smlall za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8, z4, z28),
+ svmla_za64_vg4x4 (w8, z4, z28))
+
+/*
+** mla_w8_z28_z0:
+** smlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8, z28, z0),
+ svmla_za64_vg4x4 (w8, z28, z0))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8 + 1, z4, z0),
+ svmla_za64_vg4x4 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8 + 2, z4, z0),
+ svmla_za64_vg4x4 (w8 + 2, z4, z0))
+
+/*
+** mla_w11p4_z4_z0:
+** smlall za\.d\[w11, 4:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w11p4_z4_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w11 + 4, z4, z0),
+ svmla_za64_vg4x4 (w11 + 4, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8 + 7, z4, z0),
+ svmla_za64_vg4x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8 + 8, z4, z4),
+ svmla_za64_vg4x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svint16x4_t,
+ svmla_za64_s16_vg4x4 (w8 - 1, z4, z0),
+ svmla_za64_vg4x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (0, z1, z0),
+ svmla_za64_vg4x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w0, z1, z0),
+ svmla_za64_vg4x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** smlall za\.d\[w8, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w8, z1, z0),
+ svmla_za64_vg4x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w8 + 1, z1, z0),
+ svmla_za64_vg4x4 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** smlall za\.d\[w8, 4:7, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w8 + 4, z20, z0),
+ svmla_za64_vg4x4 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** add (w8|w9|w10|w11), w8, #?6
+** smlall za\.d\[\1, 0:3, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w8 + 6, z27, z0),
+ svmla_za64_vg4x4 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w8 + 7, z1, z0),
+ svmla_za64_vg4x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w8 + 8, z1, z0),
+ svmla_za64_vg4x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w0 - 1, z1, z0),
+ svmla_za64_vg4x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w8, z0, z15),
+ svmla_za64_vg4x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlall za\.d\[w8, 0:3, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint16x4_t, svint16_t,
+ svmla_single_za64_s16_vg4x4 (w8, z20, z16),
+ svmla_za64_vg4x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.d\[\1, 0:3\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_0_z0_z0, svuint16_t,
+ svmla_za64_u16_vg4x1 (0, z0, z0),
+ svmla_za64_vg4x1 (0, z0, z0))
+
+/*
+** mla_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w0_z0_z3, svuint16_t,
+ svmla_za64_u16_vg4x1 (w0, z0, z3),
+ svmla_za64_vg4x1 (w0, z0, z3))
+
+/*
+** mla_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w7_z0_z3, svuint16_t,
+ svmla_za64_u16_vg4x1 (w7, z0, z3),
+ svmla_za64_vg4x1 (w7, z0, z3))
+
+/*
+** mla_w8_z7_z3:
+** umlall za\.d\[w8, 0:3\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z7_z3, svuint16_t,
+ svmla_za64_u16_vg4x1 (w8, z7, z3),
+ svmla_za64_vg4x1 (w8, z7, z3))
+
+/*
+** mla_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** umlall za\.d\[w8, 0:3\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8_z31_z16, svuint16_t,
+ svmla_za64_u16_vg4x1 (w8, z31, z16),
+ svmla_za64_vg4x1 (w8, z31, z16))
+
+/*
+** mla_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p1_z0_z0, svuint16_t,
+ svmla_za64_u16_vg4x1 (w8 + 1, z0, z0),
+ svmla_za64_vg4x1 (w8 + 1, z0, z0))
+
+/*
+** mla_w10p4_z23_z0:
+** umlall za\.d\[w10, 4:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w10p4_z23_z0, svuint16_t,
+ svmla_za64_u16_vg4x1 (w10 + 4, z23, z0),
+ svmla_za64_vg4x1 (w10 + 4, z23, z0))
+
+/*
+** mla_w11p6_z23_z0:
+** add (w8|w9|w10|w11), w11, #?6
+** umlall za\.d\[\1, 0:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p6_z23_z0, svuint16_t,
+ svmla_za64_u16_vg4x1 (w11 + 6, z23, z0),
+ svmla_za64_vg4x1 (w11 + 6, z23, z0))
+
+/*
+** mla_w9p8_z7_z7:
+** umlall za\.d\[w9, 8:11\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w9p8_z7_z7, svuint16_t,
+ svmla_za64_u16_vg4x1 (w9 + 8, z7, z7),
+ svmla_za64_vg4x1 (w9 + 8, z7, z7))
+
+/*
+** mla_w11p12_z23_z0:
+** umlall za\.d\[w11, 12:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w11p12_z23_z0, svuint16_t,
+ svmla_za64_u16_vg4x1 (w11 + 12, z23, z0),
+ svmla_za64_vg4x1 (w11 + 12, z23, z0))
+
+/*
+** mla_w8p14_z23_z0:
+** add (w8|w9|w10|w11), w8, #?14
+** umlall za\.d\[\1, 0:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p14_z23_z0, svuint16_t,
+ svmla_za64_u16_vg4x1 (w8 + 14, z23, z0),
+ svmla_za64_vg4x1 (w8 + 14, z23, z0))
+
+/*
+** mla_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p15_z7_z7, svuint16_t,
+ svmla_za64_u16_vg4x1 (w8 + 15, z7, z7),
+ svmla_za64_vg4x1 (w8 + 15, z7, z7))
+
+/*
+** mla_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8p16_z7_z7, svuint16_t,
+ svmla_za64_u16_vg4x1 (w8 + 16, z7, z7),
+ svmla_za64_vg4x1 (w8 + 16, z7, z7))
+
+/*
+** mla_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w8m1_z16_z0, svuint16_t,
+ svmla_za64_u16_vg4x1 (w8 - 1, z16, z0),
+ svmla_za64_vg4x1 (w8 - 1, z16, z0))
+
+/*
+** mla_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mla_w12_z0_z3, svuint16_t,
+ svmla_za64_u16_vg4x1 (w12, z0, z3),
+ svmla_za64_vg4x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (0, z0, z0),
+ svmla_za64_vg4x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w0, z0, z0),
+ svmla_za64_vg4x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** umlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8, z0, z4),
+ svmla_za64_vg4x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+** umlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8, z4, z18),
+ svmla_za64_vg4x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z23:
+** ...
+** umlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8, z0, z23),
+ svmla_za64_vg4x2 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** umlall za\.d\[w8, 0:3, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8, z23, z0),
+ svmla_za64_vg4x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z28:
+** umlall za\.d\[w8, 0:3, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8, z18, z28),
+ svmla_za64_vg4x2 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z4:
+** umlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z4, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8, z28, z4),
+ svmla_za64_vg4x2 (w8, z28, z4))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8 + 1, z4, z0),
+ svmla_za64_vg4x2 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8 + 2, z4, z0),
+ svmla_za64_vg4x2 (w8 + 2, z4, z0))
+
+/*
+** mla_w11p4_z4_z0:
+** umlall za\.d\[w11, 4:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w11p4_z4_z0, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w11 + 4, z4, z0),
+ svmla_za64_vg4x2 (w11 + 4, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8 + 7, z4, z0),
+ svmla_za64_vg4x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8 + 8, z4, z4),
+ svmla_za64_vg4x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svuint16x2_t,
+ svmla_za64_u16_vg4x2 (w8 - 1, z4, z0),
+ svmla_za64_vg4x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (0, z1, z0),
+ svmla_za64_vg4x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w0, z1, z0),
+ svmla_za64_vg4x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** umlall za\.d\[w8, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w8, z1, z0),
+ svmla_za64_vg4x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w8 + 1, z1, z0),
+ svmla_za64_vg4x2 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p2_z20_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlall za\.d\[\1, 0:3, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w8 + 2, z20, z0),
+ svmla_za64_vg4x2 (w8 + 2, z20, z0))
+
+/*
+** mla_single_w11p4_z27_z0:
+** umlall za\.d\[w11, 4:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w11p4_z27_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w11 + 4, z27, z0),
+ svmla_za64_vg4x2 (w11 + 4, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w8 + 7, z1, z0),
+ svmla_za64_vg4x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w8 + 8, z1, z0),
+ svmla_za64_vg4x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w0 - 1, z1, z0),
+ svmla_za64_vg4x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w8, z0, z15),
+ svmla_za64_vg4x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlall za\.d\[w8, 0:3, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint16x2_t, svuint16_t,
+ svmla_single_za64_u16_vg4x2 (w8, z20, z16),
+ svmla_za64_vg4x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mla_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (0, z0, z0),
+ svmla_za64_vg4x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w0, z0, z0),
+ svmla_za64_vg4x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+** umlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8, z0, z4),
+ svmla_za64_vg4x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mla_w8_z0_z18:
+** ...
+** umlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8, z0, z18),
+ svmla_za64_vg4x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z0:
+** ...
+** umlall za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z18_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8, z18, z0),
+ svmla_za64_vg4x4 (w8, z18, z0))
+
+/*
+** mla_w8_z0_z23:
+** ...
+** umlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mla_w8_z0_z23, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8, z0, z23),
+ svmla_za64_vg4x4 (w8, z0, z23))
+
+/*
+** mla_w8_z23_z0:
+** ...
+** umlall za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8, z23, z0),
+ svmla_za64_vg4x4 (w8, z23, z0))
+
+/*
+** mla_w8_z4_z28:
+** umlall za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8, z4, z28),
+ svmla_za64_vg4x4 (w8, z4, z28))
+
+/*
+** mla_w8_z28_z0:
+** umlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8_z28_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8, z28, z0),
+ svmla_za64_vg4x4 (w8, z28, z0))
+
+/*
+** mla_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p1_z4_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8 + 1, z4, z0),
+ svmla_za64_vg4x4 (w8 + 1, z4, z0))
+
+/*
+** mla_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p2_z4_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8 + 2, z4, z0),
+ svmla_za64_vg4x4 (w8 + 2, z4, z0))
+
+/*
+** mla_w11p4_z4_z0:
+** umlall za\.d\[w11, 4:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w11p4_z4_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w11 + 4, z4, z0),
+ svmla_za64_vg4x4 (w11 + 4, z4, z0))
+
+/*
+** mla_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8 + 7, z4, z0),
+ svmla_za64_vg4x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8 + 8, z4, z4),
+ svmla_za64_vg4x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svuint16x4_t,
+ svmla_za64_u16_vg4x4 (w8 - 1, z4, z0),
+ svmla_za64_vg4x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (0, z1, z0),
+ svmla_za64_vg4x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w0, z1, z0),
+ svmla_za64_vg4x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+** umlall za\.d\[w8, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w8, z1, z0),
+ svmla_za64_vg4x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w8 + 1, z1, z0),
+ svmla_za64_vg4x4 (w8 + 1, z1, z0))
+
+/*
+** mla_single_w8p4_z20_z0:
+** umlall za\.d\[w8, 4:7, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w8 + 4, z20, z0),
+ svmla_za64_vg4x4 (w8 + 4, z20, z0))
+
+/*
+** mla_single_w8p6_z27_z0:
+** add (w8|w9|w10|w11), w8, #?6
+** umlall za\.d\[\1, 0:3, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w8 + 6, z27, z0),
+ svmla_za64_vg4x4 (w8 + 6, z27, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w8 + 7, z1, z0),
+ svmla_za64_vg4x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w8 + 8, z1, z0),
+ svmla_za64_vg4x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w0 - 1, z1, z0),
+ svmla_za64_vg4x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w8, z0, z15),
+ svmla_za64_vg4x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlall za\.d\[w8, 0:3, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint16x4_t, svuint16_t,
+ svmla_single_za64_u16_vg4x4 (w8, z20, z16),
+ svmla_za64_vg4x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_0_z0_z0_0, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (0, z0, z0, 0),
+ svmls_lane_za32_vg2x1 (0, z0, z0, 0))
+
+/*
+** mls_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w0, z0, z3, 1),
+ svmls_lane_za32_vg2x1 (w0, z0, z3, 1))
+
+/*
+** mls_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w7, z0, z3, 2),
+ svmls_lane_za32_vg2x1 (w7, z0, z3, 2))
+
+/*
+** mls_lane_w8_z7_z3_3:
+** bfmlsl za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w8, z7, z3, 3),
+ svmls_lane_za32_vg2x1 (w8, z7, z3, 3))
+
+/*
+** mls_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** bfmlsl za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w8, z31, z16, 4),
+ svmls_lane_za32_vg2x1 (w8, z31, z16, 4))
+
+/*
+** mls_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w8 + 1, z0, z0, 5),
+ svmls_lane_za32_vg2x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mls_lane_w8p2_z23_z0_6:
+** bfmlsl za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w8 + 2, z23, z0, 6),
+ svmls_lane_za32_vg2x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mls_lane_w11p6_z23_z0_7:
+** bfmlsl za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p6_z23_z0_7, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w11 + 6, z23, z0, 7),
+ svmls_lane_za32_vg2x1 (w11 + 6, z23, z0, 7))
+
+/*
+** mls_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w8 + 7, z7, z7, 0),
+ svmls_lane_za32_vg2x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mls_lane_w11p10_z23_z0_1:
+** bfmlsl za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p10_z23_z0_1, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w11 + 10, z23, z0, 1),
+ svmls_lane_za32_vg2x1 (w11 + 10, z23, z0, 1))
+
+/*
+** mls_lane_w8p14_z23_z0_2:
+** bfmlsl za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w8 + 14, z23, z0, 2),
+ svmls_lane_za32_vg2x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mls_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w8 + 15, z7, z7, 3),
+ svmls_lane_za32_vg2x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mls_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w8 + 16, z7, z7, 4),
+ svmls_lane_za32_vg2x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mls_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w8 - 1, z16, z0, 5),
+ svmls_lane_za32_vg2x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mls_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x1 (w12, z0, z3, 6),
+ svmls_lane_za32_vg2x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (0, z0, z4, 0),
+ svmls_lane_za32_vg2x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w0, z0, z7, 1),
+ svmls_lane_za32_vg2x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w8, z28, z4, 2),
+ svmls_lane_za32_vg2x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p6_z0_z4_7:
+** bfmlsl za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w8 + 6, z0, z4, 7),
+ svmls_lane_za32_vg2x2 (w8 + 6, z0, z4, 7))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg2x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w8 + 8, z0, z4, 4),
+ svmls_lane_za32_vg2x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w0 - 1, z0, z4, 5),
+ svmls_lane_za32_vg2x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w8, z4, z15, 6),
+ svmls_lane_za32_vg2x2 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w8, z28, z16, 7),
+ svmls_lane_za32_vg2x2 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** bfmlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w8, z17, z7, 0),
+ svmls_lane_za32_vg2x2 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x2 (w8, z22, z4, 1),
+ svmls_lane_za32_vg2x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (0, z0, z4, 0),
+ svmls_lane_za32_vg2x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w0, z0, z7, 1),
+ svmls_lane_za32_vg2x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w8, z28, z4, 2),
+ svmls_lane_za32_vg2x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p6_z0_z4_7:
+** bfmlsl za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w8 + 6, z0, z4, 7),
+ svmls_lane_za32_vg2x4 (w8 + 6, z0, z4, 7))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg2x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w8 + 8, z0, z4, 4),
+ svmls_lane_za32_vg2x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w0 - 1, z0, z4, 5),
+ svmls_lane_za32_vg2x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w8, z4, z15, 6),
+ svmls_lane_za32_vg2x4 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w8, z28, z16, 7),
+ svmls_lane_za32_vg2x4 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** bfmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w8, z17, z7, 0),
+ svmls_lane_za32_vg2x4 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** bfmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svbfloat16x4_t, svbfloat16_t,
+ svmls_lane_za32_bf16_vg2x4 (w8, z22, z4, 1),
+ svmls_lane_za32_vg2x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_0_z0_z0_0, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (0, z0, z0, 0),
+ svmls_lane_za32_vg2x1 (0, z0, z0, 0))
+
+/*
+** mls_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w0, z0, z3, 1),
+ svmls_lane_za32_vg2x1 (w0, z0, z3, 1))
+
+/*
+** mls_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w7, z0, z3, 2),
+ svmls_lane_za32_vg2x1 (w7, z0, z3, 2))
+
+/*
+** mls_lane_w8_z7_z3_3:
+** fmlsl za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w8, z7, z3, 3),
+ svmls_lane_za32_vg2x1 (w8, z7, z3, 3))
+
+/*
+** mls_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** fmlsl za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w8, z31, z16, 4),
+ svmls_lane_za32_vg2x1 (w8, z31, z16, 4))
+
+/*
+** mls_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w8 + 1, z0, z0, 5),
+ svmls_lane_za32_vg2x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mls_lane_w8p2_z23_z0_6:
+** fmlsl za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w8 + 2, z23, z0, 6),
+ svmls_lane_za32_vg2x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mls_lane_w11p6_z23_z0_7:
+** fmlsl za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p6_z23_z0_7, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w11 + 6, z23, z0, 7),
+ svmls_lane_za32_vg2x1 (w11 + 6, z23, z0, 7))
+
+/*
+** mls_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w8 + 7, z7, z7, 0),
+ svmls_lane_za32_vg2x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mls_lane_w11p10_z23_z0_1:
+** fmlsl za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p10_z23_z0_1, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w11 + 10, z23, z0, 1),
+ svmls_lane_za32_vg2x1 (w11 + 10, z23, z0, 1))
+
+/*
+** mls_lane_w8p14_z23_z0_2:
+** fmlsl za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w8 + 14, z23, z0, 2),
+ svmls_lane_za32_vg2x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mls_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w8 + 15, z7, z7, 3),
+ svmls_lane_za32_vg2x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mls_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w8 + 16, z7, z7, 4),
+ svmls_lane_za32_vg2x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mls_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w8 - 1, z16, z0, 5),
+ svmls_lane_za32_vg2x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mls_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svfloat16_t,
+ svmls_lane_za32_f16_vg2x1 (w12, z0, z3, 6),
+ svmls_lane_za32_vg2x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (0, z0, z4, 0),
+ svmls_lane_za32_vg2x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w0, z0, z7, 1),
+ svmls_lane_za32_vg2x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w8, z28, z4, 2),
+ svmls_lane_za32_vg2x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p6_z0_z4_7:
+** fmlsl za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w8 + 6, z0, z4, 7),
+ svmls_lane_za32_vg2x2 (w8 + 6, z0, z4, 7))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg2x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w8 + 8, z0, z4, 4),
+ svmls_lane_za32_vg2x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w0 - 1, z0, z4, 5),
+ svmls_lane_za32_vg2x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w8, z4, z15, 6),
+ svmls_lane_za32_vg2x2 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w8, z28, z16, 7),
+ svmls_lane_za32_vg2x2 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** fmlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w8, z17, z7, 0),
+ svmls_lane_za32_vg2x2 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x2 (w8, z22, z4, 1),
+ svmls_lane_za32_vg2x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (0, z0, z4, 0),
+ svmls_lane_za32_vg2x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w0, z0, z7, 1),
+ svmls_lane_za32_vg2x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w8, z28, z4, 2),
+ svmls_lane_za32_vg2x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p6_z0_z4_7:
+** fmlsl za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w8 + 6, z0, z4, 7),
+ svmls_lane_za32_vg2x4 (w8 + 6, z0, z4, 7))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg2x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w8 + 8, z0, z4, 4),
+ svmls_lane_za32_vg2x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w0 - 1, z0, z4, 5),
+ svmls_lane_za32_vg2x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w8, z4, z15, 6),
+ svmls_lane_za32_vg2x4 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w8, z28, z16, 7),
+ svmls_lane_za32_vg2x4 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w8, z17, z7, 0),
+ svmls_lane_za32_vg2x4 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t,
+ svmls_lane_za32_f16_vg2x4 (w8, z22, z4, 1),
+ svmls_lane_za32_vg2x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (0, z0, z4, 0),
+ svmls_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z7\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (w0, z0, z7, 1),
+ svmls_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** fmls za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}, z4\.s\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (w8, z28, z4, 2),
+ svmls_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** fmls za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}, z4\.s\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_0, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (w8 + 8, z0, z4, 0),
+ svmls_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** mls_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_1, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (w0 - 1, z0, z4, 1),
+ svmls_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** mls_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fmls za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, z15\.s\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_2, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (w8, z4, z15, 2),
+ svmls_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** mls_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fmls za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}, \1\.s\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (w8, z28, z16, 3),
+ svmls_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** fmls za\.s\[w8, 0, vgx2\], [^\n]+, z7\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (w8, z17, z7, 0),
+ svmls_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** fmls za\.s\[w8, 0, vgx2\], {z22\.s - z23\.s}, z4\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat32x2_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x2 (w8, z22, z4, 1),
+ svmls_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (0, z0, z4, 0),
+ svmls_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z7\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (w0, z0, z7, 1),
+ svmls_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** fmls za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, z4\.s\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (w8, z28, z4, 2),
+ svmls_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** fmls za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}, z4\.s\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_0, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (w8 + 8, z0, z4, 0),
+ svmls_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** mls_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_1, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (w0 - 1, z0, z4, 1),
+ svmls_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** mls_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fmls za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}, z15\.s\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_2, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (w8, z4, z15, 2),
+ svmls_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** mls_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fmls za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, \1\.s\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (w8, z28, z16, 3),
+ svmls_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmls za\.s\[w8, 0, vgx4\], [^\n]+, z7\.s\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (w8, z17, z7, 0),
+ svmls_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmls za\.s\[w8, 0, vgx4\], [^\n]+, z4\.s\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat32x4_t, svfloat32_t,
+ svmls_lane_za32_f32_vg1x4 (w8, z22, z4, 1),
+ svmls_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** smlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_0_z0_z0_0, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (0, z0, z0, 0),
+ svmls_lane_za32_vg2x1 (0, z0, z0, 0))
+
+/*
+** mls_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w0, z0, z3, 1),
+ svmls_lane_za32_vg2x1 (w0, z0, z3, 1))
+
+/*
+** mls_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w7, z0, z3, 2),
+ svmls_lane_za32_vg2x1 (w7, z0, z3, 2))
+
+/*
+** mls_lane_w8_z7_z3_3:
+** smlsl za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w8, z7, z3, 3),
+ svmls_lane_za32_vg2x1 (w8, z7, z3, 3))
+
+/*
+** mls_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** smlsl za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w8, z31, z16, 4),
+ svmls_lane_za32_vg2x1 (w8, z31, z16, 4))
+
+/*
+** mls_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w8 + 1, z0, z0, 5),
+ svmls_lane_za32_vg2x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mls_lane_w8p2_z23_z0_6:
+** smlsl za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w8 + 2, z23, z0, 6),
+ svmls_lane_za32_vg2x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mls_lane_w11p6_z23_z0_7:
+** smlsl za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p6_z23_z0_7, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w11 + 6, z23, z0, 7),
+ svmls_lane_za32_vg2x1 (w11 + 6, z23, z0, 7))
+
+/*
+** mls_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w8 + 7, z7, z7, 0),
+ svmls_lane_za32_vg2x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mls_lane_w11p10_z23_z0_1:
+** smlsl za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p10_z23_z0_1, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w11 + 10, z23, z0, 1),
+ svmls_lane_za32_vg2x1 (w11 + 10, z23, z0, 1))
+
+/*
+** mls_lane_w8p14_z23_z0_2:
+** smlsl za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w8 + 14, z23, z0, 2),
+ svmls_lane_za32_vg2x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mls_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w8 + 15, z7, z7, 3),
+ svmls_lane_za32_vg2x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mls_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w8 + 16, z7, z7, 4),
+ svmls_lane_za32_vg2x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mls_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w8 - 1, z16, z0, 5),
+ svmls_lane_za32_vg2x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mls_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svint16_t,
+ svmls_lane_za32_s16_vg2x1 (w12, z0, z3, 6),
+ svmls_lane_za32_vg2x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (0, z0, z4, 0),
+ svmls_lane_za32_vg2x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w0, z0, z7, 1),
+ svmls_lane_za32_vg2x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** smlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w8, z28, z4, 2),
+ svmls_lane_za32_vg2x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p6_z0_z4_7:
+** smlsl za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w8 + 6, z0, z4, 7),
+ svmls_lane_za32_vg2x2 (w8 + 6, z0, z4, 7))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg2x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w8 + 8, z0, z4, 4),
+ svmls_lane_za32_vg2x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w0 - 1, z0, z4, 5),
+ svmls_lane_za32_vg2x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** smlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w8, z4, z15, 6),
+ svmls_lane_za32_vg2x2 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** smlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w8, z28, z16, 7),
+ svmls_lane_za32_vg2x2 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** smlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w8, z17, z7, 0),
+ svmls_lane_za32_vg2x2 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** smlsl za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svint16x2_t, svint16_t,
+ svmls_lane_za32_s16_vg2x2 (w8, z22, z4, 1),
+ svmls_lane_za32_vg2x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (0, z0, z4, 0),
+ svmls_lane_za32_vg2x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w0, z0, z7, 1),
+ svmls_lane_za32_vg2x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** smlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w8, z28, z4, 2),
+ svmls_lane_za32_vg2x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p6_z0_z4_7:
+** smlsl za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w8 + 6, z0, z4, 7),
+ svmls_lane_za32_vg2x4 (w8 + 6, z0, z4, 7))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg2x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w8 + 8, z0, z4, 4),
+ svmls_lane_za32_vg2x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w0 - 1, z0, z4, 5),
+ svmls_lane_za32_vg2x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** smlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w8, z4, z15, 6),
+ svmls_lane_za32_vg2x4 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** smlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w8, z28, z16, 7),
+ svmls_lane_za32_vg2x4 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w8, z17, z7, 0),
+ svmls_lane_za32_vg2x4 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svint16x4_t, svint16_t,
+ svmls_lane_za32_s16_vg2x4 (w8, z22, z4, 1),
+ svmls_lane_za32_vg2x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.s\[\1, 0:3\], z0\.b, z0\.b\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_0_z0_z0_0, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (0, z0, z0, 0),
+ svmls_lane_za32_vg4x1 (0, z0, z0, 0))
+
+/*
+** mls_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w0, z0, z3, 1),
+ svmls_lane_za32_vg4x1 (w0, z0, z3, 1))
+
+/*
+** mls_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w7, z0, z3, 2),
+ svmls_lane_za32_vg4x1 (w7, z0, z3, 2))
+
+/*
+** mls_lane_w8_z7_z3_3:
+** smlsll za\.s\[w8, 0:3\], z7\.b, z3\.b\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w8, z7, z3, 3),
+ svmls_lane_za32_vg4x1 (w8, z7, z3, 3))
+
+/*
+** mls_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** smlsll za\.s\[w8, 0:3\], z31\.b. \1\.b\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w8, z31, z16, 4),
+ svmls_lane_za32_vg4x1 (w8, z31, z16, 4))
+
+/*
+** mls_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3\], z0\.b, z0\.b\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w8 + 1, z0, z0, 5),
+ svmls_lane_za32_vg4x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mls_lane_w8p2_z23_z0_6:
+** add (w8|w9|w10|w11), w8, #?2
+** smlsll za\.s\[\1, 0:3\], z23\.b, z0\.b\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w8 + 2, z23, z0, 6),
+ svmls_lane_za32_vg4x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mls_lane_w11p4_z23_z0_7:
+** smlsll za\.s\[w11, 4:7\], z23\.b, z0\.b\[7\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p4_z23_z0_7, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w11 + 4, z23, z0, 7),
+ svmls_lane_za32_vg4x1 (w11 + 4, z23, z0, 7))
+
+/*
+** mls_lane_w8p7_z7_z7_8:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[8\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p7_z7_z7_8, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w8 + 7, z7, z7, 8),
+ svmls_lane_za32_vg4x1 (w8 + 7, z7, z7, 8))
+
+/*
+** mls_lane_w11p12_z23_z0_9:
+** smlsll za\.s\[w11, 12:15\], z23\.b, z0\.b\[9\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p12_z23_z0_9, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w11 + 12, z23, z0, 9),
+ svmls_lane_za32_vg4x1 (w11 + 12, z23, z0, 9))
+
+/*
+** mls_lane_w8p14_z23_z0_10:
+** add (w8|w9|w10|w11), w8, #?14
+** smlsll za\.s\[w8, 0:3\], z23\.b, z0\.b\[10\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p14_z23_z0_10, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w8 + 14, z23, z0, 10),
+ svmls_lane_za32_vg4x1 (w8 + 14, z23, z0, 10))
+
+/*
+** mls_lane_w8p15_z7_z7_11:
+** add (w8|w9|w10|w11), w8, #?15
+** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[11\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p15_z7_z7_11, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w8 + 15, z7, z7, 11),
+ svmls_lane_za32_vg4x1 (w8 + 15, z7, z7, 11))
+
+/*
+** mls_lane_w8p16_z7_z7_12:
+** add (w8|w9|w10|w11), w8, #?16
+** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[12\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p16_z7_z7_12, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w8 + 16, z7, z7, 12),
+ svmls_lane_za32_vg4x1 (w8 + 16, z7, z7, 12))
+
+/*
+** mls_lane_w8m1_z16_z0_13:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3\], z16\.b, z0\.b\[13\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8m1_z16_z0_13, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w8 - 1, z16, z0, 13),
+ svmls_lane_za32_vg4x1 (w8 - 1, z16, z0, 13))
+
+/*
+** mls_lane_w12_z0_z3_15:
+** mov (w8|w9|w10|w11), w12
+** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[15\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w12_z0_z3_15, svint8_t,
+ svmls_lane_za32_s8_vg4x1 (w12, z0, z3, 15),
+ svmls_lane_za32_vg4x1 (w12, z0, z3, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (0, z0, z4, 0),
+ svmls_lane_za32_vg4x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w0, z0, z7, 1),
+ svmls_lane_za32_vg4x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** smlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w8, z28, z4, 2),
+ svmls_lane_za32_vg4x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w11p4_z0_z4_3:
+** smlsll za\.s\[w11, 4:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w11 + 4, z0, z4, 3),
+ svmls_lane_za32_vg4x2 (w11 + 4, z0, z4, 3))
+
+/*
+** mls_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w8 + 6, z0, z4, 4),
+ svmls_lane_za32_vg4x2 (w8 + 6, z0, z4, 4))
+
+/*
+** mls_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w8 + 7, z0, z4, 5),
+ svmls_lane_za32_vg4x2 (w8 + 7, z0, z4, 5))
+
+/*
+** mls_lane_w8p8_z0_z4_7:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_7, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w8 + 8, z0, z4, 7),
+ svmls_lane_za32_vg4x2 (w8 + 8, z0, z4, 7))
+
+/*
+** mls_lane_w0m1_z0_z4_9:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_9, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w0 - 1, z0, z4, 9),
+ svmls_lane_za32_vg4x2 (w0 - 1, z0, z4, 9))
+
+/*
+** mls_lane_w8_z4_z15_10:
+** str d15, \[sp, #?-16\]!
+** smlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_10, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w8, z4, z15, 10),
+ svmls_lane_za32_vg4x2 (w8, z4, z15, 10))
+
+/*
+** mls_lane_w8_z28_z16_11:
+** mov (z[0-7]).d, z16.d
+** smlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_11, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w8, z28, z16, 11),
+ svmls_lane_za32_vg4x2 (w8, z28, z16, 11))
+
+/*
+** mls_lane_w8_z17_z7_13:
+** mov [^\n]+
+** mov [^\n]+
+** smlsll za\.s\[w8, 0:3, vgx2\], [^\n]+, z7\.b\[13\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_13, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w8, z17, z7, 13),
+ svmls_lane_za32_vg4x2 (w8, z17, z7, 13))
+
+/*
+** mls_lane_w8_z22_z4_15:
+** smlsll za\.s\[w8, 0:3, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_15, svint8x2_t, svint8_t,
+ svmls_lane_za32_s8_vg4x2 (w8, z22, z4, 15),
+ svmls_lane_za32_vg4x2 (w8, z22, z4, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (0, z0, z4, 0),
+ svmls_lane_za32_vg4x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w0, z0, z7, 1),
+ svmls_lane_za32_vg4x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** smlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w8, z28, z4, 2),
+ svmls_lane_za32_vg4x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w11p4_z0_z4_7:
+** smlsll za\.s\[w11, 4:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w11p4_z0_z4_7, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w11 + 4, z0, z4, 7),
+ svmls_lane_za32_vg4x4 (w11 + 4, z0, z4, 7))
+
+/*
+** mls_lane_w8p6_z0_z4_8:
+** add (w8|w9|w10|w11), w8, #?6
+** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_8, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w8 + 6, z0, z4, 8),
+ svmls_lane_za32_vg4x4 (w8 + 6, z0, z4, 8))
+
+/*
+** mls_lane_w8p7_z0_z4_9:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_9, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w8 + 7, z0, z4, 9),
+ svmls_lane_za32_vg4x4 (w8 + 7, z0, z4, 9))
+
+/*
+** mls_lane_w8p8_z0_z4_10:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_10, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w8 + 8, z0, z4, 10),
+ svmls_lane_za32_vg4x4 (w8 + 8, z0, z4, 10))
+
+/*
+** mls_lane_w0m1_z0_z4_11:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_11, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w0 - 1, z0, z4, 11),
+ svmls_lane_za32_vg4x4 (w0 - 1, z0, z4, 11))
+
+/*
+** mls_lane_w8_z4_z15_12:
+** str d15, \[sp, #?-16\]!
+** smlsll za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_12, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w8, z4, z15, 12),
+ svmls_lane_za32_vg4x4 (w8, z4, z15, 12))
+
+/*
+** mls_lane_w8_z28_z16_13:
+** mov (z[0-7]).d, z16.d
+** smlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_13, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w8, z28, z16, 13),
+ svmls_lane_za32_vg4x4 (w8, z28, z16, 13))
+
+/*
+** mls_lane_w8_z17_z7_14:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, z7\.b\[14\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_14, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w8, z17, z7, 14),
+ svmls_lane_za32_vg4x4 (w8, z17, z7, 14))
+
+/*
+** mls_lane_w8_z22_z4_15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, z4\.b\[15\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_15, svint8x4_t, svint8_t,
+ svmls_lane_za32_s8_vg4x4 (w8, z22, z4, 15),
+ svmls_lane_za32_vg4x4 (w8, z22, z4, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** umlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_0_z0_z0_0, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (0, z0, z0, 0),
+ svmls_lane_za32_vg2x1 (0, z0, z0, 0))
+
+/*
+** mls_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w0, z0, z3, 1),
+ svmls_lane_za32_vg2x1 (w0, z0, z3, 1))
+
+/*
+** mls_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w7, z0, z3, 2),
+ svmls_lane_za32_vg2x1 (w7, z0, z3, 2))
+
+/*
+** mls_lane_w8_z7_z3_3:
+** umlsl za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w8, z7, z3, 3),
+ svmls_lane_za32_vg2x1 (w8, z7, z3, 3))
+
+/*
+** mls_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** umlsl za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w8, z31, z16, 4),
+ svmls_lane_za32_vg2x1 (w8, z31, z16, 4))
+
+/*
+** mls_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w8 + 1, z0, z0, 5),
+ svmls_lane_za32_vg2x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mls_lane_w8p2_z23_z0_6:
+** umlsl za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w8 + 2, z23, z0, 6),
+ svmls_lane_za32_vg2x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mls_lane_w11p6_z23_z0_7:
+** umlsl za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p6_z23_z0_7, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w11 + 6, z23, z0, 7),
+ svmls_lane_za32_vg2x1 (w11 + 6, z23, z0, 7))
+
+/*
+** mls_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w8 + 7, z7, z7, 0),
+ svmls_lane_za32_vg2x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mls_lane_w11p10_z23_z0_1:
+** umlsl za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p10_z23_z0_1, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w11 + 10, z23, z0, 1),
+ svmls_lane_za32_vg2x1 (w11 + 10, z23, z0, 1))
+
+/*
+** mls_lane_w8p14_z23_z0_2:
+** umlsl za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w8 + 14, z23, z0, 2),
+ svmls_lane_za32_vg2x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mls_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w8 + 15, z7, z7, 3),
+ svmls_lane_za32_vg2x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mls_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w8 + 16, z7, z7, 4),
+ svmls_lane_za32_vg2x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mls_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w8 - 1, z16, z0, 5),
+ svmls_lane_za32_vg2x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mls_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svuint16_t,
+ svmls_lane_za32_u16_vg2x1 (w12, z0, z3, 6),
+ svmls_lane_za32_vg2x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (0, z0, z4, 0),
+ svmls_lane_za32_vg2x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w0, z0, z7, 1),
+ svmls_lane_za32_vg2x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** umlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w8, z28, z4, 2),
+ svmls_lane_za32_vg2x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p6_z0_z4_7:
+** umlsl za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w8 + 6, z0, z4, 7),
+ svmls_lane_za32_vg2x2 (w8 + 6, z0, z4, 7))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg2x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w8 + 8, z0, z4, 4),
+ svmls_lane_za32_vg2x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w0 - 1, z0, z4, 5),
+ svmls_lane_za32_vg2x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** umlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w8, z4, z15, 6),
+ svmls_lane_za32_vg2x2 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** umlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w8, z28, z16, 7),
+ svmls_lane_za32_vg2x2 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** umlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w8, z17, z7, 0),
+ svmls_lane_za32_vg2x2 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** umlsl za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x2 (w8, z22, z4, 1),
+ svmls_lane_za32_vg2x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (0, z0, z4, 0),
+ svmls_lane_za32_vg2x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w0, z0, z7, 1),
+ svmls_lane_za32_vg2x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** umlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w8, z28, z4, 2),
+ svmls_lane_za32_vg2x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p6_z0_z4_7:
+** umlsl za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w8 + 6, z0, z4, 7),
+ svmls_lane_za32_vg2x4 (w8 + 6, z0, z4, 7))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w8 + 7, z0, z4, 3),
+ svmls_lane_za32_vg2x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w8 + 8, z0, z4, 4),
+ svmls_lane_za32_vg2x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w0 - 1, z0, z4, 5),
+ svmls_lane_za32_vg2x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+** str d15, \[sp, #?-16\]!
+** umlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w8, z4, z15, 6),
+ svmls_lane_za32_vg2x4 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+** mov (z[0-7]).d, z16.d
+** umlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w8, z28, z16, 7),
+ svmls_lane_za32_vg2x4 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w8, z17, z7, 0),
+ svmls_lane_za32_vg2x4 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t,
+ svmls_lane_za32_u16_vg2x4 (w8, z22, z4, 1),
+ svmls_lane_za32_vg2x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.s\[\1, 0:3\], z0\.b, z0\.b\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_0_z0_z0_0, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (0, z0, z0, 0),
+ svmls_lane_za32_vg4x1 (0, z0, z0, 0))
+
+/*
+** mls_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w0, z0, z3, 1),
+ svmls_lane_za32_vg4x1 (w0, z0, z3, 1))
+
+/*
+** mls_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w7, z0, z3, 2),
+ svmls_lane_za32_vg4x1 (w7, z0, z3, 2))
+
+/*
+** mls_lane_w8_z7_z3_3:
+** umlsll za\.s\[w8, 0:3\], z7\.b, z3\.b\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w8, z7, z3, 3),
+ svmls_lane_za32_vg4x1 (w8, z7, z3, 3))
+
+/*
+** mls_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** umlsll za\.s\[w8, 0:3\], z31\.b. \1\.b\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w8, z31, z16, 4),
+ svmls_lane_za32_vg4x1 (w8, z31, z16, 4))
+
+/*
+** mls_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3\], z0\.b, z0\.b\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w8 + 1, z0, z0, 5),
+ svmls_lane_za32_vg4x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mls_lane_w8p2_z23_z0_6:
+** add (w8|w9|w10|w11), w8, #?2
+** umlsll za\.s\[\1, 0:3\], z23\.b, z0\.b\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w8 + 2, z23, z0, 6),
+ svmls_lane_za32_vg4x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mls_lane_w11p4_z23_z0_7:
+** umlsll za\.s\[w11, 4:7\], z23\.b, z0\.b\[7\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p4_z23_z0_7, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w11 + 4, z23, z0, 7),
+ svmls_lane_za32_vg4x1 (w11 + 4, z23, z0, 7))
+
+/*
+** mls_lane_w8p7_z7_z7_8:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[8\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p7_z7_z7_8, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w8 + 7, z7, z7, 8),
+ svmls_lane_za32_vg4x1 (w8 + 7, z7, z7, 8))
+
+/*
+** mls_lane_w11p12_z23_z0_9:
+** umlsll za\.s\[w11, 12:15\], z23\.b, z0\.b\[9\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p12_z23_z0_9, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w11 + 12, z23, z0, 9),
+ svmls_lane_za32_vg4x1 (w11 + 12, z23, z0, 9))
+
+/*
+** mls_lane_w8p14_z23_z0_10:
+** add (w8|w9|w10|w11), w8, #?14
+** umlsll za\.s\[w8, 0:3\], z23\.b, z0\.b\[10\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p14_z23_z0_10, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w8 + 14, z23, z0, 10),
+ svmls_lane_za32_vg4x1 (w8 + 14, z23, z0, 10))
+
+/*
+** mls_lane_w8p15_z7_z7_11:
+** add (w8|w9|w10|w11), w8, #?15
+** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[11\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p15_z7_z7_11, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w8 + 15, z7, z7, 11),
+ svmls_lane_za32_vg4x1 (w8 + 15, z7, z7, 11))
+
+/*
+** mls_lane_w8p16_z7_z7_12:
+** add (w8|w9|w10|w11), w8, #?16
+** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[12\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p16_z7_z7_12, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w8 + 16, z7, z7, 12),
+ svmls_lane_za32_vg4x1 (w8 + 16, z7, z7, 12))
+
+/*
+** mls_lane_w8m1_z16_z0_13:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3\], z16\.b, z0\.b\[13\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8m1_z16_z0_13, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w8 - 1, z16, z0, 13),
+ svmls_lane_za32_vg4x1 (w8 - 1, z16, z0, 13))
+
+/*
+** mls_lane_w12_z0_z3_15:
+** mov (w8|w9|w10|w11), w12
+** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[15\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w12_z0_z3_15, svuint8_t,
+ svmls_lane_za32_u8_vg4x1 (w12, z0, z3, 15),
+ svmls_lane_za32_vg4x1 (w12, z0, z3, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (0, z0, z4, 0),
+ svmls_lane_za32_vg4x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w0, z0, z7, 1),
+ svmls_lane_za32_vg4x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** umlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w8, z28, z4, 2),
+ svmls_lane_za32_vg4x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w11p4_z0_z4_3:
+** umlsll za\.s\[w11, 4:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w11 + 4, z0, z4, 3),
+ svmls_lane_za32_vg4x2 (w11 + 4, z0, z4, 3))
+
+/*
+** mls_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w8 + 6, z0, z4, 4),
+ svmls_lane_za32_vg4x2 (w8 + 6, z0, z4, 4))
+
+/*
+** mls_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w8 + 7, z0, z4, 5),
+ svmls_lane_za32_vg4x2 (w8 + 7, z0, z4, 5))
+
+/*
+** mls_lane_w8p8_z0_z4_7:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_7, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w8 + 8, z0, z4, 7),
+ svmls_lane_za32_vg4x2 (w8 + 8, z0, z4, 7))
+
+/*
+** mls_lane_w0m1_z0_z4_9:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_9, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w0 - 1, z0, z4, 9),
+ svmls_lane_za32_vg4x2 (w0 - 1, z0, z4, 9))
+
+/*
+** mls_lane_w8_z4_z15_10:
+** str d15, \[sp, #?-16\]!
+** umlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_10, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w8, z4, z15, 10),
+ svmls_lane_za32_vg4x2 (w8, z4, z15, 10))
+
+/*
+** mls_lane_w8_z28_z16_11:
+** mov (z[0-7]).d, z16.d
+** umlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_11, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w8, z28, z16, 11),
+ svmls_lane_za32_vg4x2 (w8, z28, z16, 11))
+
+/*
+** mls_lane_w8_z17_z7_13:
+** mov [^\n]+
+** mov [^\n]+
+** umlsll za\.s\[w8, 0:3, vgx2\], [^\n]+, z7\.b\[13\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_13, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w8, z17, z7, 13),
+ svmls_lane_za32_vg4x2 (w8, z17, z7, 13))
+
+/*
+** mls_lane_w8_z22_z4_15:
+** umlsll za\.s\[w8, 0:3, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_15, svuint8x2_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x2 (w8, z22, z4, 15),
+ svmls_lane_za32_vg4x2 (w8, z22, z4, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (0, z0, z4, 0),
+ svmls_lane_za32_vg4x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w0, z0, z7, 1),
+ svmls_lane_za32_vg4x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** umlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w8, z28, z4, 2),
+ svmls_lane_za32_vg4x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w11p4_z0_z4_7:
+** umlsll za\.s\[w11, 4:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w11p4_z0_z4_7, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w11 + 4, z0, z4, 7),
+ svmls_lane_za32_vg4x4 (w11 + 4, z0, z4, 7))
+
+/*
+** mls_lane_w8p6_z0_z4_8:
+** add (w8|w9|w10|w11), w8, #?6
+** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_8, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w8 + 6, z0, z4, 8),
+ svmls_lane_za32_vg4x4 (w8 + 6, z0, z4, 8))
+
+/*
+** mls_lane_w8p7_z0_z4_9:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_9, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w8 + 7, z0, z4, 9),
+ svmls_lane_za32_vg4x4 (w8 + 7, z0, z4, 9))
+
+/*
+** mls_lane_w8p8_z0_z4_10:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_10, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w8 + 8, z0, z4, 10),
+ svmls_lane_za32_vg4x4 (w8 + 8, z0, z4, 10))
+
+/*
+** mls_lane_w0m1_z0_z4_11:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_11, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w0 - 1, z0, z4, 11),
+ svmls_lane_za32_vg4x4 (w0 - 1, z0, z4, 11))
+
+/*
+** mls_lane_w8_z4_z15_12:
+** str d15, \[sp, #?-16\]!
+** umlsll za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_12, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w8, z4, z15, 12),
+ svmls_lane_za32_vg4x4 (w8, z4, z15, 12))
+
+/*
+** mls_lane_w8_z28_z16_13:
+** mov (z[0-7]).d, z16.d
+** umlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_13, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w8, z28, z16, 13),
+ svmls_lane_za32_vg4x4 (w8, z28, z16, 13))
+
+/*
+** mls_lane_w8_z17_z7_14:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, z7\.b\[14\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_14, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w8, z17, z7, 14),
+ svmls_lane_za32_vg4x4 (w8, z17, z7, 14))
+
+/*
+** mls_lane_w8_z22_z4_15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, z4\.b\[15\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_15, svuint8x4_t, svuint8_t,
+ svmls_lane_za32_u8_vg4x4 (w8, z22, z4, 15),
+ svmls_lane_za32_vg4x4 (w8, z22, z4, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (0, z0, z4, 0),
+ svmls_lane_za64_vg1x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z7\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (w0, z0, z7, 1),
+ svmls_lane_za64_vg1x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** fmls za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (w8, z28, z4, 0),
+ svmls_lane_za64_vg1x2 (w8, z28, z4, 0))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** fmls za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (w8 + 7, z0, z4, 1),
+ svmls_lane_za64_vg1x2 (w8 + 7, z0, z4, 1))
+
+/*
+** mls_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_0, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (w8 + 8, z0, z4, 0),
+ svmls_lane_za64_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** mls_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_1, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (w0 - 1, z0, z4, 1),
+ svmls_lane_za64_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** mls_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fmls za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, z15\.d\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_2, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (w8, z4, z15, 0),
+ svmls_lane_za64_vg1x2 (w8, z4, z15, 0))
+
+/*
+** mls_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fmls za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}, \1\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (w8, z28, z16, 1),
+ svmls_lane_za64_vg1x2 (w8, z28, z16, 1))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** fmls za\.d\[w8, 0, vgx2\], [^\n]+, z7\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (w8, z17, z7, 0),
+ svmls_lane_za64_vg1x2 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** fmls za\.d\[w8, 0, vgx2\], {z22\.d - z23\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat64x2_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x2 (w8, z22, z4, 1),
+ svmls_lane_za64_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (0, z0, z4, 0),
+ svmls_lane_za64_vg1x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z7\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (w0, z0, z7, 1),
+ svmls_lane_za64_vg1x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** fmls za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (w8, z28, z4, 0),
+ svmls_lane_za64_vg1x4 (w8, z28, z4, 0))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+** fmls za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (w8 + 7, z0, z4, 1),
+ svmls_lane_za64_vg1x4 (w8 + 7, z0, z4, 1))
+
+/*
+** mls_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_0, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (w8 + 8, z0, z4, 0),
+ svmls_lane_za64_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** mls_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_1, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (w0 - 1, z0, z4, 1),
+ svmls_lane_za64_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** mls_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fmls za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}, z15\.d\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_2, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (w8, z4, z15, 0),
+ svmls_lane_za64_vg1x4 (w8, z4, z15, 0))
+
+/*
+** mls_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fmls za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, \1\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (w8, z28, z16, 1),
+ svmls_lane_za64_vg1x4 (w8, z28, z16, 1))
+
+/*
+** mls_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmls za\.d\[w8, 0, vgx4\], [^\n]+, z7\.d\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (w8, z17, z7, 0),
+ svmls_lane_za64_vg1x4 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmls za\.d\[w8, 0, vgx4\], [^\n]+, z4\.d\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat64x4_t, svfloat64_t,
+ svmls_lane_za64_f64_vg1x4 (w8, z22, z4, 1),
+ svmls_lane_za64_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.d\[\1, 0:3\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_0_z0_z0_0, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (0, z0, z0, 0),
+ svmls_lane_za64_vg4x1 (0, z0, z0, 0))
+
+/*
+** mls_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w0, z0, z3, 1),
+ svmls_lane_za64_vg4x1 (w0, z0, z3, 1))
+
+/*
+** mls_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w7, z0, z3, 2),
+ svmls_lane_za64_vg4x1 (w7, z0, z3, 2))
+
+/*
+** mls_lane_w8_z7_z3_3:
+** smlsll za\.d\[w8, 0:3\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w8, z7, z3, 3),
+ svmls_lane_za64_vg4x1 (w8, z7, z3, 3))
+
+/*
+** mls_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** smlsll za\.d\[w8, 0:3\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w8, z31, z16, 4),
+ svmls_lane_za64_vg4x1 (w8, z31, z16, 4))
+
+/*
+** mls_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w8 + 1, z0, z0, 5),
+ svmls_lane_za64_vg4x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mls_lane_w8p2_z23_z0_6:
+** add (w8|w9|w10|w11), w8, #?2
+** smlsll za\.d\[\1, 0:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w8 + 2, z23, z0, 6),
+ svmls_lane_za64_vg4x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mls_lane_w11p4_z23_z0_7:
+** smlsll za\.d\[w11, 4:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p4_z23_z0_7, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w11 + 4, z23, z0, 7),
+ svmls_lane_za64_vg4x1 (w11 + 4, z23, z0, 7))
+
+/*
+** mls_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w8 + 7, z7, z7, 0),
+ svmls_lane_za64_vg4x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mls_lane_w11p12_z23_z0_1:
+** smlsll za\.d\[w11, 12:15\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p12_z23_z0_1, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w11 + 12, z23, z0, 1),
+ svmls_lane_za64_vg4x1 (w11 + 12, z23, z0, 1))
+
+/*
+** mls_lane_w8p14_z23_z0_2:
+** add (w8|w9|w10|w11), w8, #?14
+** smlsll za\.d\[w8, 0:3\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w8 + 14, z23, z0, 2),
+ svmls_lane_za64_vg4x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mls_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w8 + 15, z7, z7, 3),
+ svmls_lane_za64_vg4x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mls_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w8 + 16, z7, z7, 4),
+ svmls_lane_za64_vg4x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mls_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w8 - 1, z16, z0, 5),
+ svmls_lane_za64_vg4x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mls_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svint16_t,
+ svmls_lane_za64_s16_vg4x1 (w12, z0, z3, 6),
+ svmls_lane_za64_vg4x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (0, z0, z4, 0),
+ svmls_lane_za64_vg4x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w0, z0, z7, 1),
+ svmls_lane_za64_vg4x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** smlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w8, z28, z4, 2),
+ svmls_lane_za64_vg4x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w11p4_z0_z4_3:
+** smlsll za\.d\[w11, 4:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w11 + 4, z0, z4, 3),
+ svmls_lane_za64_vg4x2 (w11 + 4, z0, z4, 3))
+
+/*
+** mls_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w8 + 6, z0, z4, 4),
+ svmls_lane_za64_vg4x2 (w8 + 6, z0, z4, 4))
+
+/*
+** mls_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w8 + 7, z0, z4, 5),
+ svmls_lane_za64_vg4x2 (w8 + 7, z0, z4, 5))
+
+/*
+** mls_lane_w8p8_z0_z4_6:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_6, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w8 + 8, z0, z4, 6),
+ svmls_lane_za64_vg4x2 (w8 + 8, z0, z4, 6))
+
+/*
+** mls_lane_w0m1_z0_z4_7:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_7, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w0 - 1, z0, z4, 7),
+ svmls_lane_za64_vg4x2 (w0 - 1, z0, z4, 7))
+
+/*
+** mls_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** smlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_0, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w8, z4, z15, 0),
+ svmls_lane_za64_vg4x2 (w8, z4, z15, 0))
+
+/*
+** mls_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** smlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_1, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w8, z28, z16, 1),
+ svmls_lane_za64_vg4x2 (w8, z28, z16, 1))
+
+/*
+** mls_lane_w8_z17_z7_3:
+** mov [^\n]+
+** mov [^\n]+
+** smlsll za\.d\[w8, 0:3, vgx2\], [^\n]+, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_3, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w8, z17, z7, 3),
+ svmls_lane_za64_vg4x2 (w8, z17, z7, 3))
+
+/*
+** mls_lane_w8_z22_z4_5:
+** smlsll za\.d\[w8, 0:3, vgx2\], {z22\.h - z23\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_5, svint16x2_t, svint16_t,
+ svmls_lane_za64_s16_vg4x2 (w8, z22, z4, 5),
+ svmls_lane_za64_vg4x2 (w8, z22, z4, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (0, z0, z4, 0),
+ svmls_lane_za64_vg4x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w0, z0, z7, 1),
+ svmls_lane_za64_vg4x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** smlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w8, z28, z4, 2),
+ svmls_lane_za64_vg4x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w11p4_z0_z4_3:
+** smlsll za\.d\[w11, 4:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w11 + 4, z0, z4, 3),
+ svmls_lane_za64_vg4x4 (w11 + 4, z0, z4, 3))
+
+/*
+** mls_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w8 + 6, z0, z4, 4),
+ svmls_lane_za64_vg4x4 (w8 + 6, z0, z4, 4))
+
+/*
+** mls_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w8 + 7, z0, z4, 5),
+ svmls_lane_za64_vg4x4 (w8 + 7, z0, z4, 5))
+
+/*
+** mls_lane_w8p8_z0_z4_6:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_6, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w8 + 8, z0, z4, 6),
+ svmls_lane_za64_vg4x4 (w8 + 8, z0, z4, 6))
+
+/*
+** mls_lane_w0m1_z0_z4_7:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_7, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w0 - 1, z0, z4, 7),
+ svmls_lane_za64_vg4x4 (w0 - 1, z0, z4, 7))
+
+/*
+** mls_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** smlsll za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_0, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w8, z4, z15, 0),
+ svmls_lane_za64_vg4x4 (w8, z4, z15, 0))
+
+/*
+** mls_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** smlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w8, z28, z16, 3),
+ svmls_lane_za64_vg4x4 (w8, z28, z16, 3))
+
+/*
+** mls_lane_w8_z17_z7_4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_4, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w8, z17, z7, 4),
+ svmls_lane_za64_vg4x4 (w8, z17, z7, 4))
+
+/*
+** mls_lane_w8_z22_z4_6:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** smlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_6, svint16x4_t, svint16_t,
+ svmls_lane_za64_s16_vg4x4 (w8, z22, z4, 6),
+ svmls_lane_za64_vg4x4 (w8, z22, z4, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z0_0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.d\[\1, 0:3\], z0\.h, z0\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_0_z0_z0_0, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (0, z0, z0, 0),
+ svmls_lane_za64_vg4x1 (0, z0, z0, 0))
+
+/*
+** mls_lane_w0_z0_z3_1:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w0, z0, z3, 1),
+ svmls_lane_za64_vg4x1 (w0, z0, z3, 1))
+
+/*
+** mls_lane_w7_z0_z3_2:
+** mov (w8|w9|w10|w11), w7
+** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w7, z0, z3, 2),
+ svmls_lane_za64_vg4x1 (w7, z0, z3, 2))
+
+/*
+** mls_lane_w8_z7_z3_3:
+** umlsll za\.d\[w8, 0:3\], z7\.h, z3\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w8, z7, z3, 3),
+ svmls_lane_za64_vg4x1 (w8, z7, z3, 3))
+
+/*
+** mls_lane_w8_z31_z16_4:
+** mov (z[0-7])\.d, z16\.d
+** umlsll za\.d\[w8, 0:3\], z31\.h. \1\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w8, z31, z16, 4),
+ svmls_lane_za64_vg4x1 (w8, z31, z16, 4))
+
+/*
+** mls_lane_w8p1_z0_z0_5:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3\], z0\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w8 + 1, z0, z0, 5),
+ svmls_lane_za64_vg4x1 (w8 + 1, z0, z0, 5))
+
+/*
+** mls_lane_w8p2_z23_z0_6:
+** add (w8|w9|w10|w11), w8, #?2
+** umlsll za\.d\[\1, 0:3\], z23\.h, z0\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w8 + 2, z23, z0, 6),
+ svmls_lane_za64_vg4x1 (w8 + 2, z23, z0, 6))
+
+/*
+** mls_lane_w11p4_z23_z0_7:
+** umlsll za\.d\[w11, 4:7\], z23\.h, z0\.h\[7\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p4_z23_z0_7, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w11 + 4, z23, z0, 7),
+ svmls_lane_za64_vg4x1 (w11 + 4, z23, z0, 7))
+
+/*
+** mls_lane_w8p7_z7_z7_0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w8 + 7, z7, z7, 0),
+ svmls_lane_za64_vg4x1 (w8 + 7, z7, z7, 0))
+
+/*
+** mls_lane_w11p12_z23_z0_1:
+** umlsll za\.d\[w11, 12:15\], z23\.h, z0\.h\[1\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w11p12_z23_z0_1, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w11 + 12, z23, z0, 1),
+ svmls_lane_za64_vg4x1 (w11 + 12, z23, z0, 1))
+
+/*
+** mls_lane_w8p14_z23_z0_2:
+** add (w8|w9|w10|w11), w8, #?14
+** umlsll za\.d\[w8, 0:3\], z23\.h, z0\.h\[2\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w8 + 14, z23, z0, 2),
+ svmls_lane_za64_vg4x1 (w8 + 14, z23, z0, 2))
+
+/*
+** mls_lane_w8p15_z7_z7_3:
+** add (w8|w9|w10|w11), w8, #?15
+** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w8 + 15, z7, z7, 3),
+ svmls_lane_za64_vg4x1 (w8 + 15, z7, z7, 3))
+
+/*
+** mls_lane_w8p16_z7_z7_4:
+** add (w8|w9|w10|w11), w8, #?16
+** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w8 + 16, z7, z7, 4),
+ svmls_lane_za64_vg4x1 (w8 + 16, z7, z7, 4))
+
+/*
+** mls_lane_w8m1_z16_z0_5:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3\], z16\.h, z0\.h\[5\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w8 - 1, z16, z0, 5),
+ svmls_lane_za64_vg4x1 (w8 - 1, z16, z0, 5))
+
+/*
+** mls_lane_w12_z0_z3_6:
+** mov (w8|w9|w10|w11), w12
+** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[6\]
+** ret
+*/
+TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svuint16_t,
+ svmls_lane_za64_u16_vg4x1 (w12, z0, z3, 6),
+ svmls_lane_za64_vg4x1 (w12, z0, z3, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (0, z0, z4, 0),
+ svmls_lane_za64_vg4x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w0, z0, z7, 1),
+ svmls_lane_za64_vg4x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** umlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w8, z28, z4, 2),
+ svmls_lane_za64_vg4x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w11p4_z0_z4_3:
+** umlsll za\.d\[w11, 4:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w11 + 4, z0, z4, 3),
+ svmls_lane_za64_vg4x2 (w11 + 4, z0, z4, 3))
+
+/*
+** mls_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w8 + 6, z0, z4, 4),
+ svmls_lane_za64_vg4x2 (w8 + 6, z0, z4, 4))
+
+/*
+** mls_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w8 + 7, z0, z4, 5),
+ svmls_lane_za64_vg4x2 (w8 + 7, z0, z4, 5))
+
+/*
+** mls_lane_w8p8_z0_z4_6:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_6, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w8 + 8, z0, z4, 6),
+ svmls_lane_za64_vg4x2 (w8 + 8, z0, z4, 6))
+
+/*
+** mls_lane_w0m1_z0_z4_7:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_7, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w0 - 1, z0, z4, 7),
+ svmls_lane_za64_vg4x2 (w0 - 1, z0, z4, 7))
+
+/*
+** mls_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** umlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_0, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w8, z4, z15, 0),
+ svmls_lane_za64_vg4x2 (w8, z4, z15, 0))
+
+/*
+** mls_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** umlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_1, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w8, z28, z16, 1),
+ svmls_lane_za64_vg4x2 (w8, z28, z16, 1))
+
+/*
+** mls_lane_w8_z17_z7_3:
+** mov [^\n]+
+** mov [^\n]+
+** umlsll za\.d\[w8, 0:3, vgx2\], [^\n]+, z7\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_3, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w8, z17, z7, 3),
+ svmls_lane_za64_vg4x2 (w8, z17, z7, 3))
+
+/*
+** mls_lane_w8_z22_z4_5:
+** umlsll za\.d\[w8, 0:3, vgx2\], {z22\.h - z23\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_5, svuint16x2_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x2 (w8, z22, z4, 5),
+ svmls_lane_za64_vg4x2 (w8, z22, z4, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (0, z0, z4, 0),
+ svmls_lane_za64_vg4x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w0, z0, z7, 1),
+ svmls_lane_za64_vg4x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+** umlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w8, z28, z4, 2),
+ svmls_lane_za64_vg4x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w11p4_z0_z4_3:
+** umlsll za\.d\[w11, 4:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w11 + 4, z0, z4, 3),
+ svmls_lane_za64_vg4x4 (w11 + 4, z0, z4, 3))
+
+/*
+** mls_lane_w8p6_z0_z4_4:
+** add (w8|w9|w10|w11), w8, #?6
+** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w8 + 6, z0, z4, 4),
+ svmls_lane_za64_vg4x4 (w8 + 6, z0, z4, 4))
+
+/*
+** mls_lane_w8p7_z0_z4_5:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w8 + 7, z0, z4, 5),
+ svmls_lane_za64_vg4x4 (w8 + 7, z0, z4, 5))
+
+/*
+** mls_lane_w8p8_z0_z4_6:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_6, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w8 + 8, z0, z4, 6),
+ svmls_lane_za64_vg4x4 (w8 + 8, z0, z4, 6))
+
+/*
+** mls_lane_w0m1_z0_z4_7:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_7, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w0 - 1, z0, z4, 7),
+ svmls_lane_za64_vg4x4 (w0 - 1, z0, z4, 7))
+
+/*
+** mls_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** umlsll za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_0, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w8, z4, z15, 0),
+ svmls_lane_za64_vg4x4 (w8, z4, z15, 0))
+
+/*
+** mls_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** umlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w8, z28, z16, 3),
+ svmls_lane_za64_vg4x4 (w8, z28, z16, 3))
+
+/*
+** mls_lane_w8_z17_z7_4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, z7\.h\[4\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_4, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w8, z17, z7, 4),
+ svmls_lane_za64_vg4x4 (w8, z17, z7, 4))
+
+/*
+** mls_lane_w8_z22_z4_6:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** umlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, z4\.h\[6\]
+** ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_6, svuint16x4_t, svuint16_t,
+ svmls_lane_za64_u16_vg4x4 (w8, z22, z4, 6),
+ svmls_lane_za64_vg4x4 (w8, z22, z4, 6))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_0_z0_z0, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (0, z0, z0),
+ svmls_za32_vg2x1 (0, z0, z0))
+
+/*
+** mls_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w0_z0_z3, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w0, z0, z3),
+ svmls_za32_vg2x1 (w0, z0, z3))
+
+/*
+** mls_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w7_z0_z3, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w7, z0, z3),
+ svmls_za32_vg2x1 (w7, z0, z3))
+
+/*
+** mls_w8_z7_z3:
+** bfmlsl za\.s\[w8, 0:1\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z7_z3, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w8, z7, z3),
+ svmls_za32_vg2x1 (w8, z7, z3))
+
+/*
+** mls_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** bfmlsl za\.s\[w8, 0:1\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z31_z16, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w8, z31, z16),
+ svmls_za32_vg2x1 (w8, z31, z16))
+
+/*
+** mls_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p1_z0_z0, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w8 + 1, z0, z0),
+ svmls_za32_vg2x1 (w8 + 1, z0, z0))
+
+/*
+** mls_w8p2_z23_z0:
+** bfmlsl za\.s\[w8, 2:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p2_z23_z0, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w8 + 2, z23, z0),
+ svmls_za32_vg2x1 (w8 + 2, z23, z0))
+
+/*
+** mls_w11p6_z23_z0:
+** bfmlsl za\.s\[w11, 6:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p6_z23_z0, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w11 + 6, z23, z0),
+ svmls_za32_vg2x1 (w11 + 6, z23, z0))
+
+/*
+** mls_w8p7_z7_z7:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p7_z7_z7, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w8 + 7, z7, z7),
+ svmls_za32_vg2x1 (w8 + 7, z7, z7))
+
+/*
+** mls_w11p10_z23_z0:
+** bfmlsl za\.s\[w11, 10:11\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p10_z23_z0, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w11 + 10, z23, z0),
+ svmls_za32_vg2x1 (w11 + 10, z23, z0))
+
+/*
+** mls_w8p14_z23_z0:
+** bfmlsl za\.s\[w8, 14:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p14_z23_z0, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w8 + 14, z23, z0),
+ svmls_za32_vg2x1 (w8 + 14, z23, z0))
+
+/*
+** mls_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p15_z7_z7, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w8 + 15, z7, z7),
+ svmls_za32_vg2x1 (w8 + 15, z7, z7))
+
+/*
+** mls_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p16_z7_z7, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w8 + 16, z7, z7),
+ svmls_za32_vg2x1 (w8 + 16, z7, z7))
+
+/*
+** mls_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8m1_z16_z0, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w8 - 1, z16, z0),
+ svmls_za32_vg2x1 (w8 - 1, z16, z0))
+
+/*
+** mls_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w12_z0_z3, svbfloat16_t,
+ svmls_za32_bf16_vg2x1 (w12, z0, z3),
+ svmls_za32_vg2x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (0, z0, z0),
+ svmls_za32_vg2x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w0, z0, z0),
+ svmls_za32_vg2x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8, z0, z4),
+ svmls_za32_vg2x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8, z4, z18),
+ svmls_za32_vg2x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z23:
+** ...
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8, z0, z23),
+ svmls_za32_vg2x2 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** bfmlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8, z23, z0),
+ svmls_za32_vg2x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z28:
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8, z18, z28),
+ svmls_za32_vg2x2 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z4:
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z4, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8, z28, z4),
+ svmls_za32_vg2x2 (w8, z28, z4))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8 + 1, z4, z0),
+ svmls_za32_vg2x2 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** bfmlsl za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8 + 2, z4, z0),
+ svmls_za32_vg2x2 (w8 + 2, z4, z0))
+
+/*
+** mls_w8p6_z4_z0:
+** bfmlsl za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p6_z4_z0, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8 + 6, z4, z0),
+ svmls_za32_vg2x2 (w8 + 6, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8 + 7, z4, z0),
+ svmls_za32_vg2x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8 + 8, z4, z4),
+ svmls_za32_vg2x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svbfloat16x2_t,
+ svmls_za32_bf16_vg2x2 (w8 - 1, z4, z0),
+ svmls_za32_vg2x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (0, z1, z0),
+ svmls_za32_vg2x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w0, z1, z0),
+ svmls_za32_vg2x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w8, z1, z0),
+ svmls_za32_vg2x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w8 + 1, z1, z0),
+ svmls_za32_vg2x2 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** bfmlsl za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w8 + 4, z20, z0),
+ svmls_za32_vg2x2 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** bfmlsl za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w8 + 6, z27, z0),
+ svmls_za32_vg2x2 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w8 + 7, z1, z0),
+ svmls_za32_vg2x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w8 + 8, z1, z0),
+ svmls_za32_vg2x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w0 - 1, z1, z0),
+ svmls_za32_vg2x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w8, z0, z15),
+ svmls_za32_vg2x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** bfmlsl za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svbfloat16x2_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x2 (w8, z20, z16),
+ svmls_za32_vg2x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (0, z0, z0),
+ svmls_za32_vg2x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w0, z0, z0),
+ svmls_za32_vg2x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8, z0, z4),
+ svmls_za32_vg2x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8, z0, z18),
+ svmls_za32_vg2x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z0:
+** ...
+** bfmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8, z18, z0),
+ svmls_za32_vg2x4 (w8, z18, z0))
+
+/*
+** mls_w8_z0_z23:
+** ...
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8, z0, z23),
+ svmls_za32_vg2x4 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** bfmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8, z23, z0),
+ svmls_za32_vg2x4 (w8, z23, z0))
+
+/*
+** mls_w8_z4_z28:
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8, z4, z28),
+ svmls_za32_vg2x4 (w8, z4, z28))
+
+/*
+** mls_w8_z28_z0:
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8, z28, z0),
+ svmls_za32_vg2x4 (w8, z28, z0))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8 + 1, z4, z0),
+ svmls_za32_vg2x4 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** bfmlsl za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8 + 2, z4, z0),
+ svmls_za32_vg2x4 (w8 + 2, z4, z0))
+
+/*
+** mls_w8p6_z4_z0:
+** bfmlsl za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p6_z4_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8 + 6, z4, z0),
+ svmls_za32_vg2x4 (w8 + 6, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8 + 7, z4, z0),
+ svmls_za32_vg2x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8 + 8, z4, z4),
+ svmls_za32_vg2x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svbfloat16x4_t,
+ svmls_za32_bf16_vg2x4 (w8 - 1, z4, z0),
+ svmls_za32_vg2x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (0, z1, z0),
+ svmls_za32_vg2x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w0, z1, z0),
+ svmls_za32_vg2x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w8, z1, z0),
+ svmls_za32_vg2x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w8 + 1, z1, z0),
+ svmls_za32_vg2x4 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** bfmlsl za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w8 + 4, z20, z0),
+ svmls_za32_vg2x4 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** bfmlsl za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w8 + 6, z27, z0),
+ svmls_za32_vg2x4 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w8 + 7, z1, z0),
+ svmls_za32_vg2x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w8 + 8, z1, z0),
+ svmls_za32_vg2x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w0 - 1, z1, z0),
+ svmls_za32_vg2x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w8, z0, z15),
+ svmls_za32_vg2x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** bfmlsl za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svbfloat16x4_t, svbfloat16_t,
+ svmls_single_za32_bf16_vg2x4 (w8, z20, z16),
+ svmls_za32_vg2x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_0_z0_z0, svfloat16_t,
+ svmls_za32_f16_vg2x1 (0, z0, z0),
+ svmls_za32_vg2x1 (0, z0, z0))
+
+/*
+** mls_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w0_z0_z3, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w0, z0, z3),
+ svmls_za32_vg2x1 (w0, z0, z3))
+
+/*
+** mls_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w7_z0_z3, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w7, z0, z3),
+ svmls_za32_vg2x1 (w7, z0, z3))
+
+/*
+** mls_w8_z7_z3:
+** fmlsl za\.s\[w8, 0:1\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z7_z3, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w8, z7, z3),
+ svmls_za32_vg2x1 (w8, z7, z3))
+
+/*
+** mls_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmlsl za\.s\[w8, 0:1\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z31_z16, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w8, z31, z16),
+ svmls_za32_vg2x1 (w8, z31, z16))
+
+/*
+** mls_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p1_z0_z0, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w8 + 1, z0, z0),
+ svmls_za32_vg2x1 (w8 + 1, z0, z0))
+
+/*
+** mls_w8p2_z23_z0:
+** fmlsl za\.s\[w8, 2:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p2_z23_z0, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w8 + 2, z23, z0),
+ svmls_za32_vg2x1 (w8 + 2, z23, z0))
+
+/*
+** mls_w11p6_z23_z0:
+** fmlsl za\.s\[w11, 6:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p6_z23_z0, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w11 + 6, z23, z0),
+ svmls_za32_vg2x1 (w11 + 6, z23, z0))
+
+/*
+** mls_w8p7_z7_z7:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p7_z7_z7, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w8 + 7, z7, z7),
+ svmls_za32_vg2x1 (w8 + 7, z7, z7))
+
+/*
+** mls_w11p10_z23_z0:
+** fmlsl za\.s\[w11, 10:11\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p10_z23_z0, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w11 + 10, z23, z0),
+ svmls_za32_vg2x1 (w11 + 10, z23, z0))
+
+/*
+** mls_w8p14_z23_z0:
+** fmlsl za\.s\[w8, 14:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p14_z23_z0, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w8 + 14, z23, z0),
+ svmls_za32_vg2x1 (w8 + 14, z23, z0))
+
+/*
+** mls_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p15_z7_z7, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w8 + 15, z7, z7),
+ svmls_za32_vg2x1 (w8 + 15, z7, z7))
+
+/*
+** mls_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p16_z7_z7, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w8 + 16, z7, z7),
+ svmls_za32_vg2x1 (w8 + 16, z7, z7))
+
+/*
+** mls_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8m1_z16_z0, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w8 - 1, z16, z0),
+ svmls_za32_vg2x1 (w8 - 1, z16, z0))
+
+/*
+** mls_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w12_z0_z3, svfloat16_t,
+ svmls_za32_f16_vg2x1 (w12, z0, z3),
+ svmls_za32_vg2x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (0, z0, z0),
+ svmls_za32_vg2x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w0, z0, z0),
+ svmls_za32_vg2x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8, z0, z4),
+ svmls_za32_vg2x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8, z4, z18),
+ svmls_za32_vg2x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z23:
+** ...
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8, z0, z23),
+ svmls_za32_vg2x2 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** fmlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8, z23, z0),
+ svmls_za32_vg2x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z28:
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8, z18, z28),
+ svmls_za32_vg2x2 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z4:
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z4, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8, z28, z4),
+ svmls_za32_vg2x2 (w8, z28, z4))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8 + 1, z4, z0),
+ svmls_za32_vg2x2 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** fmlsl za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8 + 2, z4, z0),
+ svmls_za32_vg2x2 (w8 + 2, z4, z0))
+
+/*
+** mls_w8p6_z4_z0:
+** fmlsl za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p6_z4_z0, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8 + 6, z4, z0),
+ svmls_za32_vg2x2 (w8 + 6, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8 + 7, z4, z0),
+ svmls_za32_vg2x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8 + 8, z4, z4),
+ svmls_za32_vg2x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svfloat16x2_t,
+ svmls_za32_f16_vg2x2 (w8 - 1, z4, z0),
+ svmls_za32_vg2x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (0, z1, z0),
+ svmls_za32_vg2x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w0, z1, z0),
+ svmls_za32_vg2x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w8, z1, z0),
+ svmls_za32_vg2x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w8 + 1, z1, z0),
+ svmls_za32_vg2x2 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** fmlsl za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w8 + 4, z20, z0),
+ svmls_za32_vg2x2 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** fmlsl za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w8 + 6, z27, z0),
+ svmls_za32_vg2x2 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w8 + 7, z1, z0),
+ svmls_za32_vg2x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w8 + 8, z1, z0),
+ svmls_za32_vg2x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w0 - 1, z1, z0),
+ svmls_za32_vg2x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w8, z0, z15),
+ svmls_za32_vg2x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmlsl za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat16x2_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x2 (w8, z20, z16),
+ svmls_za32_vg2x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (0, z0, z0),
+ svmls_za32_vg2x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w0, z0, z0),
+ svmls_za32_vg2x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8, z0, z4),
+ svmls_za32_vg2x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8, z0, z18),
+ svmls_za32_vg2x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z0:
+** ...
+** fmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8, z18, z0),
+ svmls_za32_vg2x4 (w8, z18, z0))
+
+/*
+** mls_w8_z0_z23:
+** ...
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8, z0, z23),
+ svmls_za32_vg2x4 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** fmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8, z23, z0),
+ svmls_za32_vg2x4 (w8, z23, z0))
+
+/*
+** mls_w8_z4_z28:
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8, z4, z28),
+ svmls_za32_vg2x4 (w8, z4, z28))
+
+/*
+** mls_w8_z28_z0:
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8, z28, z0),
+ svmls_za32_vg2x4 (w8, z28, z0))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8 + 1, z4, z0),
+ svmls_za32_vg2x4 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** fmlsl za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8 + 2, z4, z0),
+ svmls_za32_vg2x4 (w8 + 2, z4, z0))
+
+/*
+** mls_w8p6_z4_z0:
+** fmlsl za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p6_z4_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8 + 6, z4, z0),
+ svmls_za32_vg2x4 (w8 + 6, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8 + 7, z4, z0),
+ svmls_za32_vg2x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8 + 8, z4, z4),
+ svmls_za32_vg2x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svfloat16x4_t,
+ svmls_za32_f16_vg2x4 (w8 - 1, z4, z0),
+ svmls_za32_vg2x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (0, z1, z0),
+ svmls_za32_vg2x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w0, z1, z0),
+ svmls_za32_vg2x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w8, z1, z0),
+ svmls_za32_vg2x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w8 + 1, z1, z0),
+ svmls_za32_vg2x4 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** fmlsl za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w8 + 4, z20, z0),
+ svmls_za32_vg2x4 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** fmlsl za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w8 + 6, z27, z0),
+ svmls_za32_vg2x4 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w8 + 7, z1, z0),
+ svmls_za32_vg2x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w8 + 8, z1, z0),
+ svmls_za32_vg2x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w0 - 1, z1, z0),
+ svmls_za32_vg2x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w8, z0, z15),
+ svmls_za32_vg2x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmlsl za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat16x4_t, svfloat16_t,
+ svmls_single_za32_f16_vg2x4 (w8, z20, z16),
+ svmls_za32_vg2x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (0, z0, z0),
+ svmls_za32_vg1x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (w0, z0, z0),
+ svmls_za32_vg1x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** fmls za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (w8, z0, z4),
+ svmls_za32_vg1x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** fmls za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (w8, z4, z18),
+ svmls_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z23_z0:
+** ...
+** fmls za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (w8, z23, z0),
+ svmls_za32_vg1x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z23:
+** ...
+** fmls za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z23, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (w8, z18, z23),
+ svmls_za32_vg1x2 (w8, z18, z23))
+
+/*
+** mls_w8_z4_z28:
+** fmls za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (w8, z4, z28),
+ svmls_za32_vg1x2 (w8, z4, z28))
+
+/*
+** mls_w8p7_z4_z0:
+** fmls za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (w8 + 7, z4, z0),
+ svmls_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (w8 + 8, z4, z4),
+ svmls_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmls za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svfloat32x2_t,
+ svmls_za32_f32_vg1x2 (w8 - 1, z4, z0),
+ svmls_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x2 (0, z1, z0),
+ svmls_za32_vg1x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x2 (w0, z1, z0),
+ svmls_za32_vg1x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** fmls za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x2 (w8, z1, z0),
+ svmls_za32_vg1x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** fmls za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x2 (w8 + 7, z1, z0),
+ svmls_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x2 (w8 + 8, z1, z0),
+ svmls_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmls za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat32x2_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x2 (w0 - 1, z1, z0),
+ svmls_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmls za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat32x2_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x2 (w8, z0, z15),
+ svmls_za32_vg1x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmls za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat32x2_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x2 (w8, z20, z16),
+ svmls_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svfloat32x4_t,
+ svmls_za32_f32_vg1x4 (0, z0, z0),
+ svmls_za32_vg1x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svfloat32x4_t,
+ svmls_za32_f32_vg1x4 (w0, z0, z0),
+ svmls_za32_vg1x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** fmls za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svfloat32x4_t,
+ svmls_za32_f32_vg1x4 (w8, z0, z4),
+ svmls_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** fmls za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svfloat32x4_t,
+ svmls_za32_f32_vg1x4 (w8, z0, z18),
+ svmls_za32_vg1x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z28:
+** ...
+** fmls za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svfloat32x4_t,
+ svmls_za32_f32_vg1x4 (w8, z18, z28),
+ svmls_za32_vg1x4 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z23:
+** ...
+** fmls za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z23, svfloat32x4_t,
+ svmls_za32_f32_vg1x4 (w8, z28, z23),
+ svmls_za32_vg1x4 (w8, z28, z23))
+
+/*
+** mls_w8p7_z4_z0:
+** fmls za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svfloat32x4_t,
+ svmls_za32_f32_vg1x4 (w8 + 7, z4, z0),
+ svmls_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svfloat32x4_t,
+ svmls_za32_f32_vg1x4 (w8 + 8, z4, z4),
+ svmls_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmls za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svfloat32x4_t,
+ svmls_za32_f32_vg1x4 (w8 - 1, z4, z0),
+ svmls_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x4 (0, z1, z0),
+ svmls_za32_vg1x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x4 (w0, z1, z0),
+ svmls_za32_vg1x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** fmls za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x4 (w8, z1, z0),
+ svmls_za32_vg1x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** fmls za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x4 (w8 + 7, z1, z0),
+ svmls_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x4 (w8 + 8, z1, z0),
+ svmls_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmls za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat32x4_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x4 (w0 - 1, z1, z0),
+ svmls_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmls za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat32x4_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x4 (w8, z0, z15),
+ svmls_za32_vg1x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmls za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat32x4_t, svfloat32_t,
+ svmls_single_za32_f32_vg1x4 (w8, z20, z16),
+ svmls_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsl za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_0_z0_z0, svint16_t,
+ svmls_za32_s16_vg2x1 (0, z0, z0),
+ svmls_za32_vg2x1 (0, z0, z0))
+
+/*
+** mls_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w0_z0_z3, svint16_t,
+ svmls_za32_s16_vg2x1 (w0, z0, z3),
+ svmls_za32_vg2x1 (w0, z0, z3))
+
+/*
+** mls_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w7_z0_z3, svint16_t,
+ svmls_za32_s16_vg2x1 (w7, z0, z3),
+ svmls_za32_vg2x1 (w7, z0, z3))
+
+/*
+** mls_w8_z7_z3:
+** smlsl za\.s\[w8, 0:1\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z7_z3, svint16_t,
+ svmls_za32_s16_vg2x1 (w8, z7, z3),
+ svmls_za32_vg2x1 (w8, z7, z3))
+
+/*
+** mls_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** smlsl za\.s\[w8, 0:1\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z31_z16, svint16_t,
+ svmls_za32_s16_vg2x1 (w8, z31, z16),
+ svmls_za32_vg2x1 (w8, z31, z16))
+
+/*
+** mls_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p1_z0_z0, svint16_t,
+ svmls_za32_s16_vg2x1 (w8 + 1, z0, z0),
+ svmls_za32_vg2x1 (w8 + 1, z0, z0))
+
+/*
+** mls_w8p2_z23_z0:
+** smlsl za\.s\[w8, 2:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p2_z23_z0, svint16_t,
+ svmls_za32_s16_vg2x1 (w8 + 2, z23, z0),
+ svmls_za32_vg2x1 (w8 + 2, z23, z0))
+
+/*
+** mls_w11p6_z23_z0:
+** smlsl za\.s\[w11, 6:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p6_z23_z0, svint16_t,
+ svmls_za32_s16_vg2x1 (w11 + 6, z23, z0),
+ svmls_za32_vg2x1 (w11 + 6, z23, z0))
+
+/*
+** mls_w8p7_z7_z7:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p7_z7_z7, svint16_t,
+ svmls_za32_s16_vg2x1 (w8 + 7, z7, z7),
+ svmls_za32_vg2x1 (w8 + 7, z7, z7))
+
+/*
+** mls_w11p10_z23_z0:
+** smlsl za\.s\[w11, 10:11\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p10_z23_z0, svint16_t,
+ svmls_za32_s16_vg2x1 (w11 + 10, z23, z0),
+ svmls_za32_vg2x1 (w11 + 10, z23, z0))
+
+/*
+** mls_w8p14_z23_z0:
+** smlsl za\.s\[w8, 14:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p14_z23_z0, svint16_t,
+ svmls_za32_s16_vg2x1 (w8 + 14, z23, z0),
+ svmls_za32_vg2x1 (w8 + 14, z23, z0))
+
+/*
+** mls_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p15_z7_z7, svint16_t,
+ svmls_za32_s16_vg2x1 (w8 + 15, z7, z7),
+ svmls_za32_vg2x1 (w8 + 15, z7, z7))
+
+/*
+** mls_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p16_z7_z7, svint16_t,
+ svmls_za32_s16_vg2x1 (w8 + 16, z7, z7),
+ svmls_za32_vg2x1 (w8 + 16, z7, z7))
+
+/*
+** mls_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8m1_z16_z0, svint16_t,
+ svmls_za32_s16_vg2x1 (w8 - 1, z16, z0),
+ svmls_za32_vg2x1 (w8 - 1, z16, z0))
+
+/*
+** mls_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w12_z0_z3, svint16_t,
+ svmls_za32_s16_vg2x1 (w12, z0, z3),
+ svmls_za32_vg2x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svint16x2_t,
+ svmls_za32_s16_vg2x2 (0, z0, z0),
+ svmls_za32_vg2x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w0, z0, z0),
+ svmls_za32_vg2x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** smlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8, z0, z4),
+ svmls_za32_vg2x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** smlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8, z4, z18),
+ svmls_za32_vg2x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z23:
+** ...
+** smlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8, z0, z23),
+ svmls_za32_vg2x2 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** smlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8, z23, z0),
+ svmls_za32_vg2x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z28:
+** smlsl za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8, z18, z28),
+ svmls_za32_vg2x2 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z4:
+** smlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z4, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8, z28, z4),
+ svmls_za32_vg2x2 (w8, z28, z4))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8 + 1, z4, z0),
+ svmls_za32_vg2x2 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** smlsl za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8 + 2, z4, z0),
+ svmls_za32_vg2x2 (w8 + 2, z4, z0))
+
+/*
+** mls_w8p6_z4_z0:
+** smlsl za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p6_z4_z0, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8 + 6, z4, z0),
+ svmls_za32_vg2x2 (w8 + 6, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8 + 7, z4, z0),
+ svmls_za32_vg2x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8 + 8, z4, z4),
+ svmls_za32_vg2x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svint16x2_t,
+ svmls_za32_s16_vg2x2 (w8 - 1, z4, z0),
+ svmls_za32_vg2x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (0, z1, z0),
+ svmls_za32_vg2x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w0, z1, z0),
+ svmls_za32_vg2x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** smlsl za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w8, z1, z0),
+ svmls_za32_vg2x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w8 + 1, z1, z0),
+ svmls_za32_vg2x2 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** smlsl za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w8 + 4, z20, z0),
+ svmls_za32_vg2x2 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** smlsl za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w8 + 6, z27, z0),
+ svmls_za32_vg2x2 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w8 + 7, z1, z0),
+ svmls_za32_vg2x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w8 + 8, z1, z0),
+ svmls_za32_vg2x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w0 - 1, z1, z0),
+ svmls_za32_vg2x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w8, z0, z15),
+ svmls_za32_vg2x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlsl za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint16x2_t, svint16_t,
+ svmls_single_za32_s16_vg2x2 (w8, z20, z16),
+ svmls_za32_vg2x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (0, z0, z0),
+ svmls_za32_vg2x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w0, z0, z0),
+ svmls_za32_vg2x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** smlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8, z0, z4),
+ svmls_za32_vg2x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** smlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8, z0, z18),
+ svmls_za32_vg2x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z0:
+** ...
+** smlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8, z18, z0),
+ svmls_za32_vg2x4 (w8, z18, z0))
+
+/*
+** mls_w8_z0_z23:
+** ...
+** smlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8, z0, z23),
+ svmls_za32_vg2x4 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** smlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8, z23, z0),
+ svmls_za32_vg2x4 (w8, z23, z0))
+
+/*
+** mls_w8_z4_z28:
+** smlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8, z4, z28),
+ svmls_za32_vg2x4 (w8, z4, z28))
+
+/*
+** mls_w8_z28_z0:
+** smlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8, z28, z0),
+ svmls_za32_vg2x4 (w8, z28, z0))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8 + 1, z4, z0),
+ svmls_za32_vg2x4 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** smlsl za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8 + 2, z4, z0),
+ svmls_za32_vg2x4 (w8 + 2, z4, z0))
+
+/*
+** mls_w8p6_z4_z0:
+** smlsl za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p6_z4_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8 + 6, z4, z0),
+ svmls_za32_vg2x4 (w8 + 6, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8 + 7, z4, z0),
+ svmls_za32_vg2x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8 + 8, z4, z4),
+ svmls_za32_vg2x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svint16x4_t,
+ svmls_za32_s16_vg2x4 (w8 - 1, z4, z0),
+ svmls_za32_vg2x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (0, z1, z0),
+ svmls_za32_vg2x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w0, z1, z0),
+ svmls_za32_vg2x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** smlsl za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w8, z1, z0),
+ svmls_za32_vg2x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w8 + 1, z1, z0),
+ svmls_za32_vg2x4 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** smlsl za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w8 + 4, z20, z0),
+ svmls_za32_vg2x4 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** smlsl za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w8 + 6, z27, z0),
+ svmls_za32_vg2x4 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w8 + 7, z1, z0),
+ svmls_za32_vg2x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w8 + 8, z1, z0),
+ svmls_za32_vg2x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w0 - 1, z1, z0),
+ svmls_za32_vg2x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w8, z0, z15),
+ svmls_za32_vg2x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlsl za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint16x4_t, svint16_t,
+ svmls_single_za32_s16_vg2x4 (w8, z20, z16),
+ svmls_za32_vg2x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.s\[\1, 0:3\], z0\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_0_z0_z0, svint8_t,
+ svmls_za32_s8_vg4x1 (0, z0, z0),
+ svmls_za32_vg4x1 (0, z0, z0))
+
+/*
+** mls_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w0_z0_z3, svint8_t,
+ svmls_za32_s8_vg4x1 (w0, z0, z3),
+ svmls_za32_vg4x1 (w0, z0, z3))
+
+/*
+** mls_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w7_z0_z3, svint8_t,
+ svmls_za32_s8_vg4x1 (w7, z0, z3),
+ svmls_za32_vg4x1 (w7, z0, z3))
+
+/*
+** mls_w8_z7_z3:
+** smlsll za\.s\[w8, 0:3\], z7\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z7_z3, svint8_t,
+ svmls_za32_s8_vg4x1 (w8, z7, z3),
+ svmls_za32_vg4x1 (w8, z7, z3))
+
+/*
+** mls_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** smlsll za\.s\[w8, 0:3\], z31\.b. \1\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z31_z16, svint8_t,
+ svmls_za32_s8_vg4x1 (w8, z31, z16),
+ svmls_za32_vg4x1 (w8, z31, z16))
+
+/*
+** mls_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3\], z0\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8p1_z0_z0, svint8_t,
+ svmls_za32_s8_vg4x1 (w8 + 1, z0, z0),
+ svmls_za32_vg4x1 (w8 + 1, z0, z0))
+
+/*
+** mls_w10p4_z23_z0:
+** smlsll za\.s\[w10, 4:7\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w10p4_z23_z0, svint8_t,
+ svmls_za32_s8_vg4x1 (w10 + 4, z23, z0),
+ svmls_za32_vg4x1 (w10 + 4, z23, z0))
+
+/*
+** mls_w11p6_z23_z0:
+** add (w8|w9|w10|w11), w11, #?6
+** smlsll za\.s\[\1, 0:3\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w11p6_z23_z0, svint8_t,
+ svmls_za32_s8_vg4x1 (w11 + 6, z23, z0),
+ svmls_za32_vg4x1 (w11 + 6, z23, z0))
+
+/*
+** mls_w9p8_z7_z7:
+** smlsll za\.s\[w9, 8:11\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w9p8_z7_z7, svint8_t,
+ svmls_za32_s8_vg4x1 (w9 + 8, z7, z7),
+ svmls_za32_vg4x1 (w9 + 8, z7, z7))
+
+/*
+** mls_w11p12_z23_z0:
+** smlsll za\.s\[w11, 12:15\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w11p12_z23_z0, svint8_t,
+ svmls_za32_s8_vg4x1 (w11 + 12, z23, z0),
+ svmls_za32_vg4x1 (w11 + 12, z23, z0))
+
+/*
+** mls_w8p14_z23_z0:
+** add (w8|w9|w10|w11), w8, #?14
+** smlsll za\.s\[\1, 0:3\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8p14_z23_z0, svint8_t,
+ svmls_za32_s8_vg4x1 (w8 + 14, z23, z0),
+ svmls_za32_vg4x1 (w8 + 14, z23, z0))
+
+/*
+** mls_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8p15_z7_z7, svint8_t,
+ svmls_za32_s8_vg4x1 (w8 + 15, z7, z7),
+ svmls_za32_vg4x1 (w8 + 15, z7, z7))
+
+/*
+** mls_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8p16_z7_z7, svint8_t,
+ svmls_za32_s8_vg4x1 (w8 + 16, z7, z7),
+ svmls_za32_vg4x1 (w8 + 16, z7, z7))
+
+/*
+** mls_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3\], z16\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8m1_z16_z0, svint8_t,
+ svmls_za32_s8_vg4x1 (w8 - 1, z16, z0),
+ svmls_za32_vg4x1 (w8 - 1, z16, z0))
+
+/*
+** mls_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w12_z0_z3, svint8_t,
+ svmls_za32_s8_vg4x1 (w12, z0, z3),
+ svmls_za32_vg4x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svint8x2_t,
+ svmls_za32_s8_vg4x2 (0, z0, z0),
+ svmls_za32_vg4x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w0, z0, z0),
+ svmls_za32_vg4x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** smlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8, z0, z4),
+ svmls_za32_vg4x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** smlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8, z4, z18),
+ svmls_za32_vg4x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z23:
+** ...
+** smlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8, z0, z23),
+ svmls_za32_vg4x2 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** smlsll za\.s\[w8, 0:3, vgx2\], [^\n]+, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8, z23, z0),
+ svmls_za32_vg4x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z28:
+** smlsll za\.s\[w8, 0:3, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8, z18, z28),
+ svmls_za32_vg4x2 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z4:
+** smlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z4, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8, z28, z4),
+ svmls_za32_vg4x2 (w8, z28, z4))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8 + 1, z4, z0),
+ svmls_za32_vg4x2 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8 + 2, z4, z0),
+ svmls_za32_vg4x2 (w8 + 2, z4, z0))
+
+/*
+** mls_w11p4_z4_z0:
+** smlsll za\.s\[w11, 4:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w11p4_z4_z0, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w11 + 4, z4, z0),
+ svmls_za32_vg4x2 (w11 + 4, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8 + 7, z4, z0),
+ svmls_za32_vg4x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8 + 8, z4, z4),
+ svmls_za32_vg4x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svint8x2_t,
+ svmls_za32_s8_vg4x2 (w8 - 1, z4, z0),
+ svmls_za32_vg4x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (0, z1, z0),
+ svmls_za32_vg4x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w0, z1, z0),
+ svmls_za32_vg4x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** smlsll za\.s\[w8, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w8, z1, z0),
+ svmls_za32_vg4x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w8 + 1, z1, z0),
+ svmls_za32_vg4x2 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p2_z20_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlsll za\.s\[\1, 0:3, vgx2\], {z20\.b - z21\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p2_z20_z0, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w8 + 2, z20, z0),
+ svmls_za32_vg4x2 (w8 + 2, z20, z0))
+
+/*
+** mls_single_w11p4_z27_z0:
+** smlsll za\.s\[w11, 4:7, vgx2\], {z27\.b - z28\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w11p4_z27_z0, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w11 + 4, z27, z0),
+ svmls_za32_vg4x2 (w11 + 4, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w8 + 7, z1, z0),
+ svmls_za32_vg4x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w8 + 8, z1, z0),
+ svmls_za32_vg4x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w0 - 1, z1, z0),
+ svmls_za32_vg4x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w8, z0, z15),
+ svmls_za32_vg4x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlsll za\.s\[w8, 0:3, vgx2\], {z20\.b - z21\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint8x2_t, svint8_t,
+ svmls_single_za32_s8_vg4x2 (w8, z20, z16),
+ svmls_za32_vg4x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (0, z0, z0),
+ svmls_za32_vg4x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w0, z0, z0),
+ svmls_za32_vg4x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** smlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8, z0, z4),
+ svmls_za32_vg4x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** smlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8, z0, z18),
+ svmls_za32_vg4x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z0:
+** ...
+** smlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8, z18, z0),
+ svmls_za32_vg4x4 (w8, z18, z0))
+
+/*
+** mls_w8_z0_z23:
+** ...
+** smlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8, z0, z23),
+ svmls_za32_vg4x4 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** smlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8, z23, z0),
+ svmls_za32_vg4x4 (w8, z23, z0))
+
+/*
+** mls_w8_z4_z28:
+** smlsll za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8, z4, z28),
+ svmls_za32_vg4x4 (w8, z4, z28))
+
+/*
+** mls_w8_z28_z0:
+** smlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8, z28, z0),
+ svmls_za32_vg4x4 (w8, z28, z0))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8 + 1, z4, z0),
+ svmls_za32_vg4x4 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8 + 2, z4, z0),
+ svmls_za32_vg4x4 (w8 + 2, z4, z0))
+
+/*
+** mls_w11p4_z4_z0:
+** smlsll za\.s\[w11, 4:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w11p4_z4_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w11 + 4, z4, z0),
+ svmls_za32_vg4x4 (w11 + 4, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8 + 7, z4, z0),
+ svmls_za32_vg4x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8 + 8, z4, z4),
+ svmls_za32_vg4x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svint8x4_t,
+ svmls_za32_s8_vg4x4 (w8 - 1, z4, z0),
+ svmls_za32_vg4x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (0, z1, z0),
+ svmls_za32_vg4x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w0, z1, z0),
+ svmls_za32_vg4x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** smlsll za\.s\[w8, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w8, z1, z0),
+ svmls_za32_vg4x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w8 + 1, z1, z0),
+ svmls_za32_vg4x4 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** smlsll za\.s\[w8, 4:7, vgx4\], {z20\.b - z23\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w8 + 4, z20, z0),
+ svmls_za32_vg4x4 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** add (w8|w9|w10|w11), w8, #?6
+** smlsll za\.s\[\1, 0:3, vgx4\], {z27\.b - z30\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w8 + 6, z27, z0),
+ svmls_za32_vg4x4 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w8 + 7, z1, z0),
+ svmls_za32_vg4x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w8 + 8, z1, z0),
+ svmls_za32_vg4x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w0 - 1, z1, z0),
+ svmls_za32_vg4x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w8, z0, z15),
+ svmls_za32_vg4x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlsll za\.s\[w8, 0:3, vgx4\], {z20\.b - z23\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint8x4_t, svint8_t,
+ svmls_single_za32_s8_vg4x4 (w8, z20, z16),
+ svmls_za32_vg4x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsl za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_0_z0_z0, svuint16_t,
+ svmls_za32_u16_vg2x1 (0, z0, z0),
+ svmls_za32_vg2x1 (0, z0, z0))
+
+/*
+** mls_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w0_z0_z3, svuint16_t,
+ svmls_za32_u16_vg2x1 (w0, z0, z3),
+ svmls_za32_vg2x1 (w0, z0, z3))
+
+/*
+** mls_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w7_z0_z3, svuint16_t,
+ svmls_za32_u16_vg2x1 (w7, z0, z3),
+ svmls_za32_vg2x1 (w7, z0, z3))
+
+/*
+** mls_w8_z7_z3:
+** umlsl za\.s\[w8, 0:1\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z7_z3, svuint16_t,
+ svmls_za32_u16_vg2x1 (w8, z7, z3),
+ svmls_za32_vg2x1 (w8, z7, z3))
+
+/*
+** mls_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** umlsl za\.s\[w8, 0:1\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z31_z16, svuint16_t,
+ svmls_za32_u16_vg2x1 (w8, z31, z16),
+ svmls_za32_vg2x1 (w8, z31, z16))
+
+/*
+** mls_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p1_z0_z0, svuint16_t,
+ svmls_za32_u16_vg2x1 (w8 + 1, z0, z0),
+ svmls_za32_vg2x1 (w8 + 1, z0, z0))
+
+/*
+** mls_w8p2_z23_z0:
+** umlsl za\.s\[w8, 2:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p2_z23_z0, svuint16_t,
+ svmls_za32_u16_vg2x1 (w8 + 2, z23, z0),
+ svmls_za32_vg2x1 (w8 + 2, z23, z0))
+
+/*
+** mls_w11p6_z23_z0:
+** umlsl za\.s\[w11, 6:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p6_z23_z0, svuint16_t,
+ svmls_za32_u16_vg2x1 (w11 + 6, z23, z0),
+ svmls_za32_vg2x1 (w11 + 6, z23, z0))
+
+/*
+** mls_w8p7_z7_z7:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p7_z7_z7, svuint16_t,
+ svmls_za32_u16_vg2x1 (w8 + 7, z7, z7),
+ svmls_za32_vg2x1 (w8 + 7, z7, z7))
+
+/*
+** mls_w11p10_z23_z0:
+** umlsl za\.s\[w11, 10:11\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p10_z23_z0, svuint16_t,
+ svmls_za32_u16_vg2x1 (w11 + 10, z23, z0),
+ svmls_za32_vg2x1 (w11 + 10, z23, z0))
+
+/*
+** mls_w8p14_z23_z0:
+** umlsl za\.s\[w8, 14:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p14_z23_z0, svuint16_t,
+ svmls_za32_u16_vg2x1 (w8 + 14, z23, z0),
+ svmls_za32_vg2x1 (w8 + 14, z23, z0))
+
+/*
+** mls_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p15_z7_z7, svuint16_t,
+ svmls_za32_u16_vg2x1 (w8 + 15, z7, z7),
+ svmls_za32_vg2x1 (w8 + 15, z7, z7))
+
+/*
+** mls_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p16_z7_z7, svuint16_t,
+ svmls_za32_u16_vg2x1 (w8 + 16, z7, z7),
+ svmls_za32_vg2x1 (w8 + 16, z7, z7))
+
+/*
+** mls_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8m1_z16_z0, svuint16_t,
+ svmls_za32_u16_vg2x1 (w8 - 1, z16, z0),
+ svmls_za32_vg2x1 (w8 - 1, z16, z0))
+
+/*
+** mls_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w12_z0_z3, svuint16_t,
+ svmls_za32_u16_vg2x1 (w12, z0, z3),
+ svmls_za32_vg2x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (0, z0, z0),
+ svmls_za32_vg2x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w0, z0, z0),
+ svmls_za32_vg2x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** umlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8, z0, z4),
+ svmls_za32_vg2x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** umlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8, z4, z18),
+ svmls_za32_vg2x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z23:
+** ...
+** umlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8, z0, z23),
+ svmls_za32_vg2x2 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** umlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8, z23, z0),
+ svmls_za32_vg2x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z28:
+** umlsl za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8, z18, z28),
+ svmls_za32_vg2x2 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z4:
+** umlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z4, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8, z28, z4),
+ svmls_za32_vg2x2 (w8, z28, z4))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8 + 1, z4, z0),
+ svmls_za32_vg2x2 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** umlsl za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8 + 2, z4, z0),
+ svmls_za32_vg2x2 (w8 + 2, z4, z0))
+
+/*
+** mls_w8p6_z4_z0:
+** umlsl za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p6_z4_z0, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8 + 6, z4, z0),
+ svmls_za32_vg2x2 (w8 + 6, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8 + 7, z4, z0),
+ svmls_za32_vg2x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8 + 8, z4, z4),
+ svmls_za32_vg2x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svuint16x2_t,
+ svmls_za32_u16_vg2x2 (w8 - 1, z4, z0),
+ svmls_za32_vg2x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (0, z1, z0),
+ svmls_za32_vg2x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w0, z1, z0),
+ svmls_za32_vg2x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** umlsl za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w8, z1, z0),
+ svmls_za32_vg2x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w8 + 1, z1, z0),
+ svmls_za32_vg2x2 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** umlsl za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w8 + 4, z20, z0),
+ svmls_za32_vg2x2 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** umlsl za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w8 + 6, z27, z0),
+ svmls_za32_vg2x2 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w8 + 7, z1, z0),
+ svmls_za32_vg2x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w8 + 8, z1, z0),
+ svmls_za32_vg2x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w0 - 1, z1, z0),
+ svmls_za32_vg2x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w8, z0, z15),
+ svmls_za32_vg2x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlsl za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint16x2_t, svuint16_t,
+ svmls_single_za32_u16_vg2x2 (w8, z20, z16),
+ svmls_za32_vg2x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (0, z0, z0),
+ svmls_za32_vg2x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w0, z0, z0),
+ svmls_za32_vg2x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** umlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8, z0, z4),
+ svmls_za32_vg2x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** umlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8, z0, z18),
+ svmls_za32_vg2x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z0:
+** ...
+** umlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8, z18, z0),
+ svmls_za32_vg2x4 (w8, z18, z0))
+
+/*
+** mls_w8_z0_z23:
+** ...
+** umlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8, z0, z23),
+ svmls_za32_vg2x4 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** umlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8, z23, z0),
+ svmls_za32_vg2x4 (w8, z23, z0))
+
+/*
+** mls_w8_z4_z28:
+** umlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8, z4, z28),
+ svmls_za32_vg2x4 (w8, z4, z28))
+
+/*
+** mls_w8_z28_z0:
+** umlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8, z28, z0),
+ svmls_za32_vg2x4 (w8, z28, z0))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8 + 1, z4, z0),
+ svmls_za32_vg2x4 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** umlsl za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8 + 2, z4, z0),
+ svmls_za32_vg2x4 (w8 + 2, z4, z0))
+
+/*
+** mls_w8p6_z4_z0:
+** umlsl za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p6_z4_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8 + 6, z4, z0),
+ svmls_za32_vg2x4 (w8 + 6, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8 + 7, z4, z0),
+ svmls_za32_vg2x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8 + 8, z4, z4),
+ svmls_za32_vg2x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svuint16x4_t,
+ svmls_za32_u16_vg2x4 (w8 - 1, z4, z0),
+ svmls_za32_vg2x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (0, z1, z0),
+ svmls_za32_vg2x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w0, z1, z0),
+ svmls_za32_vg2x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** umlsl za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w8, z1, z0),
+ svmls_za32_vg2x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w8 + 1, z1, z0),
+ svmls_za32_vg2x4 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** umlsl za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w8 + 4, z20, z0),
+ svmls_za32_vg2x4 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** umlsl za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w8 + 6, z27, z0),
+ svmls_za32_vg2x4 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w8 + 7, z1, z0),
+ svmls_za32_vg2x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w8 + 8, z1, z0),
+ svmls_za32_vg2x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w0 - 1, z1, z0),
+ svmls_za32_vg2x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w8, z0, z15),
+ svmls_za32_vg2x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlsl za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint16x4_t, svuint16_t,
+ svmls_single_za32_u16_vg2x4 (w8, z20, z16),
+ svmls_za32_vg2x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.s\[\1, 0:3\], z0\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_0_z0_z0, svuint8_t,
+ svmls_za32_u8_vg4x1 (0, z0, z0),
+ svmls_za32_vg4x1 (0, z0, z0))
+
+/*
+** mls_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w0_z0_z3, svuint8_t,
+ svmls_za32_u8_vg4x1 (w0, z0, z3),
+ svmls_za32_vg4x1 (w0, z0, z3))
+
+/*
+** mls_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w7_z0_z3, svuint8_t,
+ svmls_za32_u8_vg4x1 (w7, z0, z3),
+ svmls_za32_vg4x1 (w7, z0, z3))
+
+/*
+** mls_w8_z7_z3:
+** umlsll za\.s\[w8, 0:3\], z7\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z7_z3, svuint8_t,
+ svmls_za32_u8_vg4x1 (w8, z7, z3),
+ svmls_za32_vg4x1 (w8, z7, z3))
+
+/*
+** mls_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** umlsll za\.s\[w8, 0:3\], z31\.b. \1\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z31_z16, svuint8_t,
+ svmls_za32_u8_vg4x1 (w8, z31, z16),
+ svmls_za32_vg4x1 (w8, z31, z16))
+
+/*
+** mls_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3\], z0\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8p1_z0_z0, svuint8_t,
+ svmls_za32_u8_vg4x1 (w8 + 1, z0, z0),
+ svmls_za32_vg4x1 (w8 + 1, z0, z0))
+
+/*
+** mls_w10p4_z23_z0:
+** umlsll za\.s\[w10, 4:7\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w10p4_z23_z0, svuint8_t,
+ svmls_za32_u8_vg4x1 (w10 + 4, z23, z0),
+ svmls_za32_vg4x1 (w10 + 4, z23, z0))
+
+/*
+** mls_w11p6_z23_z0:
+** add (w8|w9|w10|w11), w11, #?6
+** umlsll za\.s\[\1, 0:3\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w11p6_z23_z0, svuint8_t,
+ svmls_za32_u8_vg4x1 (w11 + 6, z23, z0),
+ svmls_za32_vg4x1 (w11 + 6, z23, z0))
+
+/*
+** mls_w9p8_z7_z7:
+** umlsll za\.s\[w9, 8:11\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w9p8_z7_z7, svuint8_t,
+ svmls_za32_u8_vg4x1 (w9 + 8, z7, z7),
+ svmls_za32_vg4x1 (w9 + 8, z7, z7))
+
+/*
+** mls_w11p12_z23_z0:
+** umlsll za\.s\[w11, 12:15\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w11p12_z23_z0, svuint8_t,
+ svmls_za32_u8_vg4x1 (w11 + 12, z23, z0),
+ svmls_za32_vg4x1 (w11 + 12, z23, z0))
+
+/*
+** mls_w8p14_z23_z0:
+** add (w8|w9|w10|w11), w8, #?14
+** umlsll za\.s\[\1, 0:3\], z23\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8p14_z23_z0, svuint8_t,
+ svmls_za32_u8_vg4x1 (w8 + 14, z23, z0),
+ svmls_za32_vg4x1 (w8 + 14, z23, z0))
+
+/*
+** mls_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8p15_z7_z7, svuint8_t,
+ svmls_za32_u8_vg4x1 (w8 + 15, z7, z7),
+ svmls_za32_vg4x1 (w8 + 15, z7, z7))
+
+/*
+** mls_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8p16_z7_z7, svuint8_t,
+ svmls_za32_u8_vg4x1 (w8 + 16, z7, z7),
+ svmls_za32_vg4x1 (w8 + 16, z7, z7))
+
+/*
+** mls_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3\], z16\.b, z0\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w8m1_z16_z0, svuint8_t,
+ svmls_za32_u8_vg4x1 (w8 - 1, z16, z0),
+ svmls_za32_vg4x1 (w8 - 1, z16, z0))
+
+/*
+** mls_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b
+** ret
+*/
+TEST_ZA_X1 (mls_w12_z0_z3, svuint8_t,
+ svmls_za32_u8_vg4x1 (w12, z0, z3),
+ svmls_za32_vg4x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (0, z0, z0),
+ svmls_za32_vg4x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w0, z0, z0),
+ svmls_za32_vg4x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** umlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8, z0, z4),
+ svmls_za32_vg4x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** umlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8, z4, z18),
+ svmls_za32_vg4x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z23:
+** ...
+** umlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8, z0, z23),
+ svmls_za32_vg4x2 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** umlsll za\.s\[w8, 0:3, vgx2\], [^\n]+, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8, z23, z0),
+ svmls_za32_vg4x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z28:
+** umlsll za\.s\[w8, 0:3, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8, z18, z28),
+ svmls_za32_vg4x2 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z4:
+** umlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z4, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8, z28, z4),
+ svmls_za32_vg4x2 (w8, z28, z4))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8 + 1, z4, z0),
+ svmls_za32_vg4x2 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8 + 2, z4, z0),
+ svmls_za32_vg4x2 (w8 + 2, z4, z0))
+
+/*
+** mls_w11p4_z4_z0:
+** umlsll za\.s\[w11, 4:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w11p4_z4_z0, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w11 + 4, z4, z0),
+ svmls_za32_vg4x2 (w11 + 4, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8 + 7, z4, z0),
+ svmls_za32_vg4x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8 + 8, z4, z4),
+ svmls_za32_vg4x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svuint8x2_t,
+ svmls_za32_u8_vg4x2 (w8 - 1, z4, z0),
+ svmls_za32_vg4x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (0, z1, z0),
+ svmls_za32_vg4x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w0, z1, z0),
+ svmls_za32_vg4x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** umlsll za\.s\[w8, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w8, z1, z0),
+ svmls_za32_vg4x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w8 + 1, z1, z0),
+ svmls_za32_vg4x2 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p2_z20_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlsll za\.s\[\1, 0:3, vgx2\], {z20\.b - z21\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p2_z20_z0, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w8 + 2, z20, z0),
+ svmls_za32_vg4x2 (w8 + 2, z20, z0))
+
+/*
+** mls_single_w11p4_z27_z0:
+** umlsll za\.s\[w11, 4:7, vgx2\], {z27\.b - z28\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w11p4_z27_z0, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w11 + 4, z27, z0),
+ svmls_za32_vg4x2 (w11 + 4, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w8 + 7, z1, z0),
+ svmls_za32_vg4x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w8 + 8, z1, z0),
+ svmls_za32_vg4x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w0 - 1, z1, z0),
+ svmls_za32_vg4x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w8, z0, z15),
+ svmls_za32_vg4x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlsll za\.s\[w8, 0:3, vgx2\], {z20\.b - z21\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint8x2_t, svuint8_t,
+ svmls_single_za32_u8_vg4x2 (w8, z20, z16),
+ svmls_za32_vg4x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (0, z0, z0),
+ svmls_za32_vg4x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w0, z0, z0),
+ svmls_za32_vg4x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** umlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8, z0, z4),
+ svmls_za32_vg4x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** umlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8, z0, z18),
+ svmls_za32_vg4x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z0:
+** ...
+** umlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8, z18, z0),
+ svmls_za32_vg4x4 (w8, z18, z0))
+
+/*
+** mls_w8_z0_z23:
+** ...
+** umlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8, z0, z23),
+ svmls_za32_vg4x4 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** umlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8, z23, z0),
+ svmls_za32_vg4x4 (w8, z23, z0))
+
+/*
+** mls_w8_z4_z28:
+** umlsll za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8, z4, z28),
+ svmls_za32_vg4x4 (w8, z4, z28))
+
+/*
+** mls_w8_z28_z0:
+** umlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8, z28, z0),
+ svmls_za32_vg4x4 (w8, z28, z0))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8 + 1, z4, z0),
+ svmls_za32_vg4x4 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8 + 2, z4, z0),
+ svmls_za32_vg4x4 (w8 + 2, z4, z0))
+
+/*
+** mls_w11p4_z4_z0:
+** umlsll za\.s\[w11, 4:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w11p4_z4_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w11 + 4, z4, z0),
+ svmls_za32_vg4x4 (w11 + 4, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8 + 7, z4, z0),
+ svmls_za32_vg4x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8 + 8, z4, z4),
+ svmls_za32_vg4x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svuint8x4_t,
+ svmls_za32_u8_vg4x4 (w8 - 1, z4, z0),
+ svmls_za32_vg4x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (0, z1, z0),
+ svmls_za32_vg4x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w0, z1, z0),
+ svmls_za32_vg4x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** umlsll za\.s\[w8, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w8, z1, z0),
+ svmls_za32_vg4x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w8 + 1, z1, z0),
+ svmls_za32_vg4x4 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** umlsll za\.s\[w8, 4:7, vgx4\], {z20\.b - z23\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w8 + 4, z20, z0),
+ svmls_za32_vg4x4 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** add (w8|w9|w10|w11), w8, #?6
+** umlsll za\.s\[\1, 0:3, vgx4\], {z27\.b - z30\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w8 + 6, z27, z0),
+ svmls_za32_vg4x4 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w8 + 7, z1, z0),
+ svmls_za32_vg4x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w8 + 8, z1, z0),
+ svmls_za32_vg4x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w0 - 1, z1, z0),
+ svmls_za32_vg4x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w8, z0, z15),
+ svmls_za32_vg4x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlsll za\.s\[w8, 0:3, vgx4\], {z20\.b - z23\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint8x4_t, svuint8_t,
+ svmls_single_za32_u8_vg4x4 (w8, z20, z16),
+ svmls_za32_vg4x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (0, z0, z0),
+ svmls_za64_vg1x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (w0, z0, z0),
+ svmls_za64_vg1x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** fmls za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (w8, z0, z4),
+ svmls_za64_vg1x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** fmls za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (w8, z4, z18),
+ svmls_za64_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z23_z0:
+** ...
+** fmls za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (w8, z23, z0),
+ svmls_za64_vg1x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z23:
+** ...
+** fmls za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z23, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (w8, z18, z23),
+ svmls_za64_vg1x2 (w8, z18, z23))
+
+/*
+** mls_w8_z4_z28:
+** fmls za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (w8, z4, z28),
+ svmls_za64_vg1x2 (w8, z4, z28))
+
+/*
+** mls_w8p7_z4_z0:
+** fmls za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (w8 + 7, z4, z0),
+ svmls_za64_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (w8 + 8, z4, z4),
+ svmls_za64_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmls za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svfloat64x2_t,
+ svmls_za64_f64_vg1x2 (w8 - 1, z4, z0),
+ svmls_za64_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x2 (0, z1, z0),
+ svmls_za64_vg1x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x2 (w0, z1, z0),
+ svmls_za64_vg1x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** fmls za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x2 (w8, z1, z0),
+ svmls_za64_vg1x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** fmls za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x2 (w8 + 7, z1, z0),
+ svmls_za64_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x2 (w8 + 8, z1, z0),
+ svmls_za64_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmls za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat64x2_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x2 (w0 - 1, z1, z0),
+ svmls_za64_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmls za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat64x2_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x2 (w8, z0, z15),
+ svmls_za64_vg1x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmls za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat64x2_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x2 (w8, z20, z16),
+ svmls_za64_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svfloat64x4_t,
+ svmls_za64_f64_vg1x4 (0, z0, z0),
+ svmls_za64_vg1x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svfloat64x4_t,
+ svmls_za64_f64_vg1x4 (w0, z0, z0),
+ svmls_za64_vg1x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** fmls za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svfloat64x4_t,
+ svmls_za64_f64_vg1x4 (w8, z0, z4),
+ svmls_za64_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** fmls za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svfloat64x4_t,
+ svmls_za64_f64_vg1x4 (w8, z0, z18),
+ svmls_za64_vg1x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z28:
+** ...
+** fmls za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svfloat64x4_t,
+ svmls_za64_f64_vg1x4 (w8, z18, z28),
+ svmls_za64_vg1x4 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z23:
+** ...
+** fmls za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z23, svfloat64x4_t,
+ svmls_za64_f64_vg1x4 (w8, z28, z23),
+ svmls_za64_vg1x4 (w8, z28, z23))
+
+/*
+** mls_w8p7_z4_z0:
+** fmls za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svfloat64x4_t,
+ svmls_za64_f64_vg1x4 (w8 + 7, z4, z0),
+ svmls_za64_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svfloat64x4_t,
+ svmls_za64_f64_vg1x4 (w8 + 8, z4, z4),
+ svmls_za64_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fmls za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svfloat64x4_t,
+ svmls_za64_f64_vg1x4 (w8 - 1, z4, z0),
+ svmls_za64_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** fmls za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x4 (0, z1, z0),
+ svmls_za64_vg1x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** fmls za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x4 (w0, z1, z0),
+ svmls_za64_vg1x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** fmls za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x4 (w8, z1, z0),
+ svmls_za64_vg1x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** fmls za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x4 (w8 + 7, z1, z0),
+ svmls_za64_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fmls za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x4 (w8 + 8, z1, z0),
+ svmls_za64_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** fmls za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat64x4_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x4 (w0 - 1, z1, z0),
+ svmls_za64_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** fmls za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat64x4_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x4 (w8, z0, z15),
+ svmls_za64_vg1x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** fmls za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat64x4_t, svfloat64_t,
+ svmls_single_za64_f64_vg1x4 (w8, z20, z16),
+ svmls_za64_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.d\[\1, 0:3\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_0_z0_z0, svint16_t,
+ svmls_za64_s16_vg4x1 (0, z0, z0),
+ svmls_za64_vg4x1 (0, z0, z0))
+
+/*
+** mls_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w0_z0_z3, svint16_t,
+ svmls_za64_s16_vg4x1 (w0, z0, z3),
+ svmls_za64_vg4x1 (w0, z0, z3))
+
+/*
+** mls_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w7_z0_z3, svint16_t,
+ svmls_za64_s16_vg4x1 (w7, z0, z3),
+ svmls_za64_vg4x1 (w7, z0, z3))
+
+/*
+** mls_w8_z7_z3:
+** smlsll za\.d\[w8, 0:3\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z7_z3, svint16_t,
+ svmls_za64_s16_vg4x1 (w8, z7, z3),
+ svmls_za64_vg4x1 (w8, z7, z3))
+
+/*
+** mls_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** smlsll za\.d\[w8, 0:3\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z31_z16, svint16_t,
+ svmls_za64_s16_vg4x1 (w8, z31, z16),
+ svmls_za64_vg4x1 (w8, z31, z16))
+
+/*
+** mls_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p1_z0_z0, svint16_t,
+ svmls_za64_s16_vg4x1 (w8 + 1, z0, z0),
+ svmls_za64_vg4x1 (w8 + 1, z0, z0))
+
+/*
+** mls_w10p4_z23_z0:
+** smlsll za\.d\[w10, 4:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w10p4_z23_z0, svint16_t,
+ svmls_za64_s16_vg4x1 (w10 + 4, z23, z0),
+ svmls_za64_vg4x1 (w10 + 4, z23, z0))
+
+/*
+** mls_w11p6_z23_z0:
+** add (w8|w9|w10|w11), w11, #?6
+** smlsll za\.d\[\1, 0:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p6_z23_z0, svint16_t,
+ svmls_za64_s16_vg4x1 (w11 + 6, z23, z0),
+ svmls_za64_vg4x1 (w11 + 6, z23, z0))
+
+/*
+** mls_w9p8_z7_z7:
+** smlsll za\.d\[w9, 8:11\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w9p8_z7_z7, svint16_t,
+ svmls_za64_s16_vg4x1 (w9 + 8, z7, z7),
+ svmls_za64_vg4x1 (w9 + 8, z7, z7))
+
+/*
+** mls_w11p12_z23_z0:
+** smlsll za\.d\[w11, 12:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p12_z23_z0, svint16_t,
+ svmls_za64_s16_vg4x1 (w11 + 12, z23, z0),
+ svmls_za64_vg4x1 (w11 + 12, z23, z0))
+
+/*
+** mls_w8p14_z23_z0:
+** add (w8|w9|w10|w11), w8, #?14
+** smlsll za\.d\[\1, 0:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p14_z23_z0, svint16_t,
+ svmls_za64_s16_vg4x1 (w8 + 14, z23, z0),
+ svmls_za64_vg4x1 (w8 + 14, z23, z0))
+
+/*
+** mls_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p15_z7_z7, svint16_t,
+ svmls_za64_s16_vg4x1 (w8 + 15, z7, z7),
+ svmls_za64_vg4x1 (w8 + 15, z7, z7))
+
+/*
+** mls_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p16_z7_z7, svint16_t,
+ svmls_za64_s16_vg4x1 (w8 + 16, z7, z7),
+ svmls_za64_vg4x1 (w8 + 16, z7, z7))
+
+/*
+** mls_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8m1_z16_z0, svint16_t,
+ svmls_za64_s16_vg4x1 (w8 - 1, z16, z0),
+ svmls_za64_vg4x1 (w8 - 1, z16, z0))
+
+/*
+** mls_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w12_z0_z3, svint16_t,
+ svmls_za64_s16_vg4x1 (w12, z0, z3),
+ svmls_za64_vg4x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svint16x2_t,
+ svmls_za64_s16_vg4x2 (0, z0, z0),
+ svmls_za64_vg4x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w0, z0, z0),
+ svmls_za64_vg4x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** smlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8, z0, z4),
+ svmls_za64_vg4x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** smlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8, z4, z18),
+ svmls_za64_vg4x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z23:
+** ...
+** smlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8, z0, z23),
+ svmls_za64_vg4x2 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** smlsll za\.d\[w8, 0:3, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8, z23, z0),
+ svmls_za64_vg4x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z28:
+** smlsll za\.d\[w8, 0:3, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8, z18, z28),
+ svmls_za64_vg4x2 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z4:
+** smlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z4, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8, z28, z4),
+ svmls_za64_vg4x2 (w8, z28, z4))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8 + 1, z4, z0),
+ svmls_za64_vg4x2 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8 + 2, z4, z0),
+ svmls_za64_vg4x2 (w8 + 2, z4, z0))
+
+/*
+** mls_w11p4_z4_z0:
+** smlsll za\.d\[w11, 4:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w11p4_z4_z0, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w11 + 4, z4, z0),
+ svmls_za64_vg4x2 (w11 + 4, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8 + 7, z4, z0),
+ svmls_za64_vg4x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8 + 8, z4, z4),
+ svmls_za64_vg4x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svint16x2_t,
+ svmls_za64_s16_vg4x2 (w8 - 1, z4, z0),
+ svmls_za64_vg4x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (0, z1, z0),
+ svmls_za64_vg4x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w0, z1, z0),
+ svmls_za64_vg4x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** smlsll za\.d\[w8, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w8, z1, z0),
+ svmls_za64_vg4x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w8 + 1, z1, z0),
+ svmls_za64_vg4x2 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p2_z20_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlsll za\.d\[\1, 0:3, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p2_z20_z0, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w8 + 2, z20, z0),
+ svmls_za64_vg4x2 (w8 + 2, z20, z0))
+
+/*
+** mls_single_w11p4_z27_z0:
+** smlsll za\.d\[w11, 4:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w11p4_z27_z0, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w11 + 4, z27, z0),
+ svmls_za64_vg4x2 (w11 + 4, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w8 + 7, z1, z0),
+ svmls_za64_vg4x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w8 + 8, z1, z0),
+ svmls_za64_vg4x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w0 - 1, z1, z0),
+ svmls_za64_vg4x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w8, z0, z15),
+ svmls_za64_vg4x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlsll za\.d\[w8, 0:3, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint16x2_t, svint16_t,
+ svmls_single_za64_s16_vg4x2 (w8, z20, z16),
+ svmls_za64_vg4x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (0, z0, z0),
+ svmls_za64_vg4x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w0, z0, z0),
+ svmls_za64_vg4x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** smlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8, z0, z4),
+ svmls_za64_vg4x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** smlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8, z0, z18),
+ svmls_za64_vg4x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z0:
+** ...
+** smlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8, z18, z0),
+ svmls_za64_vg4x4 (w8, z18, z0))
+
+/*
+** mls_w8_z0_z23:
+** ...
+** smlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8, z0, z23),
+ svmls_za64_vg4x4 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** smlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8, z23, z0),
+ svmls_za64_vg4x4 (w8, z23, z0))
+
+/*
+** mls_w8_z4_z28:
+** smlsll za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8, z4, z28),
+ svmls_za64_vg4x4 (w8, z4, z28))
+
+/*
+** mls_w8_z28_z0:
+** smlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8, z28, z0),
+ svmls_za64_vg4x4 (w8, z28, z0))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8 + 1, z4, z0),
+ svmls_za64_vg4x4 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8 + 2, z4, z0),
+ svmls_za64_vg4x4 (w8 + 2, z4, z0))
+
+/*
+** mls_w11p4_z4_z0:
+** smlsll za\.d\[w11, 4:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w11p4_z4_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w11 + 4, z4, z0),
+ svmls_za64_vg4x4 (w11 + 4, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8 + 7, z4, z0),
+ svmls_za64_vg4x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8 + 8, z4, z4),
+ svmls_za64_vg4x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svint16x4_t,
+ svmls_za64_s16_vg4x4 (w8 - 1, z4, z0),
+ svmls_za64_vg4x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (0, z1, z0),
+ svmls_za64_vg4x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w0, z1, z0),
+ svmls_za64_vg4x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** smlsll za\.d\[w8, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w8, z1, z0),
+ svmls_za64_vg4x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w8 + 1, z1, z0),
+ svmls_za64_vg4x4 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** smlsll za\.d\[w8, 4:7, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w8 + 4, z20, z0),
+ svmls_za64_vg4x4 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** add (w8|w9|w10|w11), w8, #?6
+** smlsll za\.d\[\1, 0:3, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w8 + 6, z27, z0),
+ svmls_za64_vg4x4 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w8 + 7, z1, z0),
+ svmls_za64_vg4x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w8 + 8, z1, z0),
+ svmls_za64_vg4x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w0 - 1, z1, z0),
+ svmls_za64_vg4x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** smlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w8, z0, z15),
+ svmls_za64_vg4x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** smlsll za\.d\[w8, 0:3, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint16x4_t, svint16_t,
+ svmls_single_za64_s16_vg4x4 (w8, z20, z16),
+ svmls_za64_vg4x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.d\[\1, 0:3\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_0_z0_z0, svuint16_t,
+ svmls_za64_u16_vg4x1 (0, z0, z0),
+ svmls_za64_vg4x1 (0, z0, z0))
+
+/*
+** mls_w0_z0_z3:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w0_z0_z3, svuint16_t,
+ svmls_za64_u16_vg4x1 (w0, z0, z3),
+ svmls_za64_vg4x1 (w0, z0, z3))
+
+/*
+** mls_w7_z0_z3:
+** mov (w8|w9|w10|w11), w7
+** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w7_z0_z3, svuint16_t,
+ svmls_za64_u16_vg4x1 (w7, z0, z3),
+ svmls_za64_vg4x1 (w7, z0, z3))
+
+/*
+** mls_w8_z7_z3:
+** umlsll za\.d\[w8, 0:3\], z7\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z7_z3, svuint16_t,
+ svmls_za64_u16_vg4x1 (w8, z7, z3),
+ svmls_za64_vg4x1 (w8, z7, z3))
+
+/*
+** mls_w8_z31_z16:
+** mov (z[0-7])\.d, z16\.d
+** umlsll za\.d\[w8, 0:3\], z31\.h. \1\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8_z31_z16, svuint16_t,
+ svmls_za64_u16_vg4x1 (w8, z31, z16),
+ svmls_za64_vg4x1 (w8, z31, z16))
+
+/*
+** mls_w8p1_z0_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3\], z0\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p1_z0_z0, svuint16_t,
+ svmls_za64_u16_vg4x1 (w8 + 1, z0, z0),
+ svmls_za64_vg4x1 (w8 + 1, z0, z0))
+
+/*
+** mls_w10p4_z23_z0:
+** umlsll za\.d\[w10, 4:7\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w10p4_z23_z0, svuint16_t,
+ svmls_za64_u16_vg4x1 (w10 + 4, z23, z0),
+ svmls_za64_vg4x1 (w10 + 4, z23, z0))
+
+/*
+** mls_w11p6_z23_z0:
+** add (w8|w9|w10|w11), w11, #?6
+** umlsll za\.d\[\1, 0:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p6_z23_z0, svuint16_t,
+ svmls_za64_u16_vg4x1 (w11 + 6, z23, z0),
+ svmls_za64_vg4x1 (w11 + 6, z23, z0))
+
+/*
+** mls_w9p8_z7_z7:
+** umlsll za\.d\[w9, 8:11\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w9p8_z7_z7, svuint16_t,
+ svmls_za64_u16_vg4x1 (w9 + 8, z7, z7),
+ svmls_za64_vg4x1 (w9 + 8, z7, z7))
+
+/*
+** mls_w11p12_z23_z0:
+** umlsll za\.d\[w11, 12:15\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w11p12_z23_z0, svuint16_t,
+ svmls_za64_u16_vg4x1 (w11 + 12, z23, z0),
+ svmls_za64_vg4x1 (w11 + 12, z23, z0))
+
+/*
+** mls_w8p14_z23_z0:
+** add (w8|w9|w10|w11), w8, #?14
+** umlsll za\.d\[\1, 0:3\], z23\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p14_z23_z0, svuint16_t,
+ svmls_za64_u16_vg4x1 (w8 + 14, z23, z0),
+ svmls_za64_vg4x1 (w8 + 14, z23, z0))
+
+/*
+** mls_w8p15_z7_z7:
+** add (w8|w9|w10|w11), w8, #?15
+** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p15_z7_z7, svuint16_t,
+ svmls_za64_u16_vg4x1 (w8 + 15, z7, z7),
+ svmls_za64_vg4x1 (w8 + 15, z7, z7))
+
+/*
+** mls_w8p16_z7_z7:
+** add (w8|w9|w10|w11), w8, #?16
+** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8p16_z7_z7, svuint16_t,
+ svmls_za64_u16_vg4x1 (w8 + 16, z7, z7),
+ svmls_za64_vg4x1 (w8 + 16, z7, z7))
+
+/*
+** mls_w8m1_z16_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3\], z16\.h, z0\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w8m1_z16_z0, svuint16_t,
+ svmls_za64_u16_vg4x1 (w8 - 1, z16, z0),
+ svmls_za64_vg4x1 (w8 - 1, z16, z0))
+
+/*
+** mls_w12_z0_z3:
+** mov (w8|w9|w10|w11), w12
+** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h
+** ret
+*/
+TEST_ZA_X1 (mls_w12_z0_z3, svuint16_t,
+ svmls_za64_u16_vg4x1 (w12, z0, z3),
+ svmls_za64_vg4x1 (w12, z0, z3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (0, z0, z0),
+ svmls_za64_vg4x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w0, z0, z0),
+ svmls_za64_vg4x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** umlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8, z0, z4),
+ svmls_za64_vg4x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+** umlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8, z4, z18),
+ svmls_za64_vg4x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z23:
+** ...
+** umlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8, z0, z23),
+ svmls_za64_vg4x2 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** umlsll za\.d\[w8, 0:3, vgx2\], [^\n]+, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8, z23, z0),
+ svmls_za64_vg4x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z28:
+** umlsll za\.d\[w8, 0:3, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8, z18, z28),
+ svmls_za64_vg4x2 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z4:
+** umlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z4, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8, z28, z4),
+ svmls_za64_vg4x2 (w8, z28, z4))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8 + 1, z4, z0),
+ svmls_za64_vg4x2 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8 + 2, z4, z0),
+ svmls_za64_vg4x2 (w8 + 2, z4, z0))
+
+/*
+** mls_w11p4_z4_z0:
+** umlsll za\.d\[w11, 4:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w11p4_z4_z0, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w11 + 4, z4, z0),
+ svmls_za64_vg4x2 (w11 + 4, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8 + 7, z4, z0),
+ svmls_za64_vg4x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8 + 8, z4, z4),
+ svmls_za64_vg4x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svuint16x2_t,
+ svmls_za64_u16_vg4x2 (w8 - 1, z4, z0),
+ svmls_za64_vg4x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (0, z1, z0),
+ svmls_za64_vg4x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w0, z1, z0),
+ svmls_za64_vg4x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** umlsll za\.d\[w8, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w8, z1, z0),
+ svmls_za64_vg4x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w8 + 1, z1, z0),
+ svmls_za64_vg4x2 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p2_z20_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlsll za\.d\[\1, 0:3, vgx2\], {z20\.h - z21\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p2_z20_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w8 + 2, z20, z0),
+ svmls_za64_vg4x2 (w8 + 2, z20, z0))
+
+/*
+** mls_single_w11p4_z27_z0:
+** umlsll za\.d\[w11, 4:7, vgx2\], {z27\.h - z28\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w11p4_z27_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w11 + 4, z27, z0),
+ svmls_za64_vg4x2 (w11 + 4, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w8 + 7, z1, z0),
+ svmls_za64_vg4x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w8 + 8, z1, z0),
+ svmls_za64_vg4x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w0 - 1, z1, z0),
+ svmls_za64_vg4x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w8, z0, z15),
+ svmls_za64_vg4x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlsll za\.d\[w8, 0:3, vgx2\], {z20\.h - z21\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint16x2_t, svuint16_t,
+ svmls_single_za64_u16_vg4x2 (w8, z20, z16),
+ svmls_za64_vg4x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** mls_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (0, z0, z0),
+ svmls_za64_vg4x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w0, z0, z0),
+ svmls_za64_vg4x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+** umlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8, z0, z4),
+ svmls_za64_vg4x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** mls_w8_z0_z18:
+** ...
+** umlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8, z0, z18),
+ svmls_za64_vg4x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z0:
+** ...
+** umlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z18_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8, z18, z0),
+ svmls_za64_vg4x4 (w8, z18, z0))
+
+/*
+** mls_w8_z0_z23:
+** ...
+** umlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (mls_w8_z0_z23, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8, z0, z23),
+ svmls_za64_vg4x4 (w8, z0, z23))
+
+/*
+** mls_w8_z23_z0:
+** ...
+** umlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8, z23, z0),
+ svmls_za64_vg4x4 (w8, z23, z0))
+
+/*
+** mls_w8_z4_z28:
+** umlsll za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8, z4, z28),
+ svmls_za64_vg4x4 (w8, z4, z28))
+
+/*
+** mls_w8_z28_z0:
+** umlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8_z28_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8, z28, z0),
+ svmls_za64_vg4x4 (w8, z28, z0))
+
+/*
+** mls_w8p1_z4_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p1_z4_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8 + 1, z4, z0),
+ svmls_za64_vg4x4 (w8 + 1, z4, z0))
+
+/*
+** mls_w8p2_z4_z0:
+** add (w8|w9|w10|w11), w8, #?2
+** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p2_z4_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8 + 2, z4, z0),
+ svmls_za64_vg4x4 (w8 + 2, z4, z0))
+
+/*
+** mls_w11p4_z4_z0:
+** umlsll za\.d\[w11, 4:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w11p4_z4_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w11 + 4, z4, z0),
+ svmls_za64_vg4x4 (w11 + 4, z4, z0))
+
+/*
+** mls_w8p7_z4_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8 + 7, z4, z0),
+ svmls_za64_vg4x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8 + 8, z4, z4),
+ svmls_za64_vg4x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svuint16x4_t,
+ svmls_za64_u16_vg4x4 (w8 - 1, z4, z0),
+ svmls_za64_vg4x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (0, z1, z0),
+ svmls_za64_vg4x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w0, z1, z0),
+ svmls_za64_vg4x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+** umlsll za\.d\[w8, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w8, z1, z0),
+ svmls_za64_vg4x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p1_z1_z0:
+** add (w8|w9|w10|w11), w8, #?1
+** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w8 + 1, z1, z0),
+ svmls_za64_vg4x4 (w8 + 1, z1, z0))
+
+/*
+** mls_single_w8p4_z20_z0:
+** umlsll za\.d\[w8, 4:7, vgx4\], {z20\.h - z23\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w8 + 4, z20, z0),
+ svmls_za64_vg4x4 (w8 + 4, z20, z0))
+
+/*
+** mls_single_w8p6_z27_z0:
+** add (w8|w9|w10|w11), w8, #?6
+** umlsll za\.d\[\1, 0:3, vgx4\], {z27\.h - z30\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w8 + 6, z27, z0),
+ svmls_za64_vg4x4 (w8 + 6, z27, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+** add (w8|w9|w10|w11), w8, #?7
+** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w8 + 7, z1, z0),
+ svmls_za64_vg4x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w8 + 8, z1, z0),
+ svmls_za64_vg4x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w0 - 1, z1, z0),
+ svmls_za64_vg4x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** umlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, z15\.h
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w8, z0, z15),
+ svmls_za64_vg4x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** umlsll za\.d\[w8, 0:3, vgx4\], {z20\.h - z23\.h}, \1\.h
+** ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint16x4_t, svuint16_t,
+ svmls_single_za64_u16_vg4x4 (w8, z20, z16),
+ svmls_za64_vg4x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mopa_za32_s16_0_p0_p1_z0_z1:
+** smopa za0\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_s16_0_p0_p1_z0_z1, svint16_t,
+ svmopa_za32_s16_m (0, p0, p1, z0, z1),
+ svmopa_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za32_s16_0_p1_p0_z1_z0:
+** smopa za0\.s, p1/m, p0/m, z1\.h, z0\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_s16_0_p1_p0_z1_z0, svint16_t,
+ svmopa_za32_s16_m (0, p1, p0, z1, z0),
+ svmopa_za32_m (0, p1, p0, z1, z0))
+
+/*
+** mopa_za32_s16_3_p0_p1_z0_z1:
+** smopa za3\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_s16_3_p0_p1_z0_z1, svint16_t,
+ svmopa_za32_s16_m (3, p0, p1, z0, z1),
+ svmopa_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mopa_za32_u16_0_p0_p1_z0_z1:
+** umopa za0\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_u16_0_p0_p1_z0_z1, svuint16_t,
+ svmopa_za32_u16_m (0, p0, p1, z0, z1),
+ svmopa_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za32_u16_3_p0_p1_z0_z1:
+** umopa za3\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_u16_3_p0_p1_z0_z1, svuint16_t,
+ svmopa_za32_u16_m (3, p0, p1, z0, z1),
+ svmopa_za32_m (3, p0, p1, z0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** mops_za32_s16_0_p0_p1_z0_z1:
+** smops za0\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_s16_0_p0_p1_z0_z1, svint16_t,
+ svmops_za32_s16_m (0, p0, p1, z0, z1),
+ svmops_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za32_s16_0_p1_p0_z1_z0:
+** smops za0\.s, p1/m, p0/m, z1\.h, z0\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_s16_0_p1_p0_z1_z0, svint16_t,
+ svmops_za32_s16_m (0, p1, p0, z1, z0),
+ svmops_za32_m (0, p1, p0, z1, z0))
+
+/*
+** mops_za32_s16_3_p0_p1_z0_z1:
+** smops za3\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_s16_3_p0_p1_z0_z1, svint16_t,
+ svmops_za32_s16_m (3, p0, p1, z0, z1),
+ svmops_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mops_za32_u16_0_p0_p1_z0_z1:
+** umops za0\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_u16_0_p0_p1_z0_z1, svuint16_t,
+ svmops_za32_u16_m (0, p0, p1, z0, z1),
+ svmops_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za32_u16_3_p0_p1_z0_z1:
+** umops za3\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_u16_3_p0_p1_z0_z1, svuint16_t,
+ svmops_za32_u16_m (3, p0, p1, z0, z1),
+ svmops_za32_m (3, p0, p1, z0, z1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** pext_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.h, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn0_0, svbool_t,
+ p2 = svpext_c16 (pn0, 0),
+ p2 = svpext_c16 (pn0, 0))
+
+/*
+** pext_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.h, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p5_pn7_1, svbool_t,
+ p5 = svpext_c16 (pn7, 1),
+ p5 = svpext_c16 (pn7, 1))
+
+/*
+** pext_p9_pn8_2:
+** pext p9\.h, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p9_pn8_2, svbool_t,
+ p9 = svpext_c16 (pn8, 2),
+ p9 = svpext_c16 (pn8, 2))
+
+/*
+** pext_p12_pn11_3:
+** pext p12\.h, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p12_pn11_3, svbool_t,
+ p12 = svpext_c16 (pn11, 3),
+ p12 = svpext_c16 (pn11, 3))
+
+/*
+** pext_p2_pn15_0:
+** pext p2\.h, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn15_0, svbool_t,
+ p2 = svpext_c16 (pn15, 0),
+ p2 = svpext_c16 (pn15, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** pext_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.h, p3\.h}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn0_0, svboolx2_t,
+ p2 = svpext_c16_x2 (pn0, 0),
+ p2 = svpext_c16_x2 (pn0, 0))
+
+/*
+** pext_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_p5_pn7_1, svboolx2_t,
+ p5 = svpext_c16_x2 (pn7, 1),
+ p5 = svpext_c16_x2 (pn7, 1))
+
+/*
+** pext_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_p9_pn8_0, svboolx2_t,
+ p9 = svpext_c16_x2 (pn8, 0),
+ p9 = svpext_c16_x2 (pn8, 0))
+
+/*
+** pext_p12_pn11_1:
+** pext {p12\.h, p13\.h}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p12_pn11_1, svboolx2_t,
+ p12 = svpext_c16_x2 (pn11, 1),
+ p12 = svpext_c16_x2 (pn11, 1))
+
+/*
+** pext_p2_pn15_0:
+** pext {p2\.h, p3\.h}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn15_0, svboolx2_t,
+ p2 = svpext_c16_x2 (pn15, 0),
+ p2 = svpext_c16_x2 (pn15, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** pext_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.s, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn0_0, svbool_t,
+ p2 = svpext_c32 (pn0, 0),
+ p2 = svpext_c32 (pn0, 0))
+
+/*
+** pext_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.s, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p5_pn7_1, svbool_t,
+ p5 = svpext_c32 (pn7, 1),
+ p5 = svpext_c32 (pn7, 1))
+
+/*
+** pext_p9_pn8_2:
+** pext p9\.s, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p9_pn8_2, svbool_t,
+ p9 = svpext_c32 (pn8, 2),
+ p9 = svpext_c32 (pn8, 2))
+
+/*
+** pext_p12_pn11_3:
+** pext p12\.s, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p12_pn11_3, svbool_t,
+ p12 = svpext_c32 (pn11, 3),
+ p12 = svpext_c32 (pn11, 3))
+
+/*
+** pext_p2_pn15_0:
+** pext p2\.s, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn15_0, svbool_t,
+ p2 = svpext_c32 (pn15, 0),
+ p2 = svpext_c32 (pn15, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** pext_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.s, p3\.s}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn0_0, svboolx2_t,
+ p2 = svpext_c32_x2 (pn0, 0),
+ p2 = svpext_c32_x2 (pn0, 0))
+
+/*
+** pext_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_p5_pn7_1, svboolx2_t,
+ p5 = svpext_c32_x2 (pn7, 1),
+ p5 = svpext_c32_x2 (pn7, 1))
+
+/*
+** pext_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_p9_pn8_0, svboolx2_t,
+ p9 = svpext_c32_x2 (pn8, 0),
+ p9 = svpext_c32_x2 (pn8, 0))
+
+/*
+** pext_p12_pn11_1:
+** pext {p12\.s, p13\.s}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p12_pn11_1, svboolx2_t,
+ p12 = svpext_c32_x2 (pn11, 1),
+ p12 = svpext_c32_x2 (pn11, 1))
+
+/*
+** pext_p2_pn15_0:
+** pext {p2\.s, p3\.s}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn15_0, svboolx2_t,
+ p2 = svpext_c32_x2 (pn15, 0),
+ p2 = svpext_c32_x2 (pn15, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** pext_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.d, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn0_0, svbool_t,
+ p2 = svpext_c64 (pn0, 0),
+ p2 = svpext_c64 (pn0, 0))
+
+/*
+** pext_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.d, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p5_pn7_1, svbool_t,
+ p5 = svpext_c64 (pn7, 1),
+ p5 = svpext_c64 (pn7, 1))
+
+/*
+** pext_p9_pn8_2:
+** pext p9\.d, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p9_pn8_2, svbool_t,
+ p9 = svpext_c64 (pn8, 2),
+ p9 = svpext_c64 (pn8, 2))
+
+/*
+** pext_p12_pn11_3:
+** pext p12\.d, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p12_pn11_3, svbool_t,
+ p12 = svpext_c64 (pn11, 3),
+ p12 = svpext_c64 (pn11, 3))
+
+/*
+** pext_p2_pn15_0:
+** pext p2\.d, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn15_0, svbool_t,
+ p2 = svpext_c64 (pn15, 0),
+ p2 = svpext_c64 (pn15, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** pext_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.d, p3\.d}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn0_0, svboolx2_t,
+ p2 = svpext_c64_x2 (pn0, 0),
+ p2 = svpext_c64_x2 (pn0, 0))
+
+/*
+** pext_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_p5_pn7_1, svboolx2_t,
+ p5 = svpext_c64_x2 (pn7, 1),
+ p5 = svpext_c64_x2 (pn7, 1))
+
+/*
+** pext_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_p9_pn8_0, svboolx2_t,
+ p9 = svpext_c64_x2 (pn8, 0),
+ p9 = svpext_c64_x2 (pn8, 0))
+
+/*
+** pext_p12_pn11_1:
+** pext {p12\.d, p13\.d}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p12_pn11_1, svboolx2_t,
+ p12 = svpext_c64_x2 (pn11, 1),
+ p12 = svpext_c64_x2 (pn11, 1))
+
+/*
+** pext_p2_pn15_0:
+** pext {p2\.d, p3\.d}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn15_0, svboolx2_t,
+ p2 = svpext_c64_x2 (pn15, 0),
+ p2 = svpext_c64_x2 (pn15, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** pext_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.b, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn0_0, svbool_t,
+ p2 = svpext_c8 (pn0, 0),
+ p2 = svpext_c8 (pn0, 0))
+
+/*
+** pext_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.b, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p5_pn7_1, svbool_t,
+ p5 = svpext_c8 (pn7, 1),
+ p5 = svpext_c8 (pn7, 1))
+
+/*
+** pext_p9_pn8_2:
+** pext p9\.b, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p9_pn8_2, svbool_t,
+ p9 = svpext_c8 (pn8, 2),
+ p9 = svpext_c8 (pn8, 2))
+
+/*
+** pext_p12_pn11_3:
+** pext p12\.b, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p12_pn11_3, svbool_t,
+ p12 = svpext_c8 (pn11, 3),
+ p12 = svpext_c8 (pn11, 3))
+
+/*
+** pext_p2_pn15_0:
+** pext p2\.b, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn15_0, svbool_t,
+ p2 = svpext_c8 (pn15, 0),
+ p2 = svpext_c8 (pn15, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** pext_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.b, p3\.b}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn0_0, svboolx2_t,
+ p2 = svpext_c8_x2 (pn0, 0),
+ p2 = svpext_c8_x2 (pn0, 0))
+
+/*
+** pext_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_p5_pn7_1, svboolx2_t,
+ p5 = svpext_c8_x2 (pn7, 1),
+ p5 = svpext_c8_x2 (pn7, 1))
+
+/*
+** pext_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_p9_pn8_0, svboolx2_t,
+ p9 = svpext_c8_x2 (pn8, 0),
+ p9 = svpext_c8_x2 (pn8, 0))
+
+/*
+** pext_p12_pn11_1:
+** pext {p12\.b, p13\.b}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p12_pn11_1, svboolx2_t,
+ p12 = svpext_c8_x2 (pn11, 1),
+ p12 = svpext_c8_x2 (pn11, 1))
+
+/*
+** pext_p2_pn15_0:
+** pext {p2\.b, p3\.b}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_p2_pn15_0, svboolx2_t,
+ p2 = svpext_c8_x2 (pn15, 0),
+ p2 = svpext_c8_x2 (pn15, 0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** pfalse_pn0:
+** pfalse p0\.b
+** ret
+*/
+TEST_PN (pfalse_pn0,
+ pn0 = svpfalse_c (),
+ pn0 = svpfalse_c ())
+
+/*
+** pfalse_pn7:
+** pfalse p7\.b
+** ret
+*/
+TEST_PN (pfalse_pn7,
+ pn7 = svpfalse_c (),
+ pn7 = svpfalse_c ())
+
+/*
+** pfalse_pn8:
+** pfalse p8\.b
+** ret
+*/
+TEST_PN (pfalse_pn8,
+ pn8 = svpfalse_c (),
+ pn8 = svpfalse_c ())
+
+/*
+** pfalse_pn15:
+** pfalse p15\.b
+** ret
+*/
+TEST_PN (pfalse_pn15,
+ pn15 = svpfalse_c (),
+ pn15 = svpfalse_c ())
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** psel_p0_p2_p7_0:
+** mov [wx](1[2-5]), #?0
+** psel p0, p2, p7\.h\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p2_p7_0, svbool_t,
+ p0 = svpsel_b16 (p2, p7, 0),
+ p0 = svpsel_b16 (p2, p7, 0))
+
+/*
+** psel_p2_p7_p8_w11:
+** mov [wx](1[2-5]), [wx]11
+** psel p2, p7, p8\.h\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p7_p8_w11, svbool_t,
+ p2 = svpsel_b16 (p7, p8, w11),
+ p2 = svpsel_b16 (p7, p8, w11))
+
+/*
+** psel_p7_p8_p13_w12:
+** psel p7, p8, p13\.h\[w12, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p7_p8_p13_w12, svbool_t,
+ p7 = svpsel_b16 (p8, p13, w12),
+ p7 = svpsel_b16 (p8, p13, w12))
+
+/*
+** psel_p8_p13_p15_w15:
+** psel p8, p13, p15\.h\[w15, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p8_p13_p15_w15, svbool_t,
+ p8 = svpsel_b16 (p13, p15, w15),
+ p8 = svpsel_b16 (p13, p15, w15))
+
+/*
+** psel_p13_p15_p0_w16:
+** mov [wx](1[2-5]), [wx]16
+** psel p13, p15, p0\.h\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p15_p0_w16, svbool_t,
+ p13 = svpsel_b16 (p15, p0, w16),
+ p13 = svpsel_b16 (p15, p0, w16))
+
+/*
+** psel_p15_p13_p8_w12p1:
+** psel p15, p13, p8\.h\[w12, 1\]
+** ret
+*/
+TEST_SELECT_P (psel_p15_p13_p8_w12p1, svbool_t,
+ p15 = svpsel_b16 (p13, p8, w12 + 1),
+ p15 = svpsel_b16 (p13, p8, w12 + 1))
+
+/*
+** psel_p13_p8_p7_w12p7:
+** psel p13, p8, p7\.h\[w12, 7\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p8_p7_w12p7, svbool_t,
+ p13 = svpsel_b16 (p8, p7, w12 + 7),
+ p13 = svpsel_b16 (p8, p7, w12 + 7))
+
+/*
+** psel_p0_p0_p0_w12p8:
+** add (w[0-9]+), w12, #?8
+** psel p0, p0, p0\.h\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p0_p0_w12p8, svbool_t,
+ p0 = svpsel_b16 (p0, p0, w12 + 8),
+ p0 = svpsel_b16 (p0, p0, w12 + 8))
+
+/*
+** psel_p15_p15_p15_w12m1:
+** sub (w[0-9]+), w12, #?1
+** psel p15, p15, p15\.h\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p15_p15_p15_w12m1, svbool_t,
+ p15 = svpsel_b16 (p15, p15, w12 - 1),
+ p15 = svpsel_b16 (p15, p15, w12 - 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** psel_p0_p2_p7_0:
+** mov [wx](1[2-5]), #?0
+** psel p0, p2, p7\.s\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p2_p7_0, svbool_t,
+ p0 = svpsel_b32 (p2, p7, 0),
+ p0 = svpsel_b32 (p2, p7, 0))
+
+/*
+** psel_p2_p7_p8_w11:
+** mov [wx](1[2-5]), [wx]11
+** psel p2, p7, p8\.s\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p7_p8_w11, svbool_t,
+ p2 = svpsel_b32 (p7, p8, w11),
+ p2 = svpsel_b32 (p7, p8, w11))
+
+/*
+** psel_p7_p8_p13_w12:
+** psel p7, p8, p13\.s\[w12, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p7_p8_p13_w12, svbool_t,
+ p7 = svpsel_b32 (p8, p13, w12),
+ p7 = svpsel_b32 (p8, p13, w12))
+
+/*
+** psel_p8_p13_p15_w15:
+** psel p8, p13, p15\.s\[w15, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p8_p13_p15_w15, svbool_t,
+ p8 = svpsel_b32 (p13, p15, w15),
+ p8 = svpsel_b32 (p13, p15, w15))
+
+/*
+** psel_p13_p15_p0_w16:
+** mov [wx](1[2-5]), [wx]16
+** psel p13, p15, p0\.s\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p15_p0_w16, svbool_t,
+ p13 = svpsel_b32 (p15, p0, w16),
+ p13 = svpsel_b32 (p15, p0, w16))
+
+/*
+** psel_p15_p13_p8_w12p1:
+** psel p15, p13, p8\.s\[w12, 1\]
+** ret
+*/
+TEST_SELECT_P (psel_p15_p13_p8_w12p1, svbool_t,
+ p15 = svpsel_b32 (p13, p8, w12 + 1),
+ p15 = svpsel_b32 (p13, p8, w12 + 1))
+
+/*
+** psel_p13_p8_p7_w12p3:
+** psel p13, p8, p7\.s\[w12, 3\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p8_p7_w12p3, svbool_t,
+ p13 = svpsel_b32 (p8, p7, w12 + 3),
+ p13 = svpsel_b32 (p8, p7, w12 + 3))
+
+/*
+** psel_p0_p0_p0_w12p4:
+** add (w[0-9]+), w12, #?4
+** psel p0, p0, p0\.s\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p0_p0_w12p4, svbool_t,
+ p0 = svpsel_b32 (p0, p0, w12 + 4),
+ p0 = svpsel_b32 (p0, p0, w12 + 4))
+
+/*
+** psel_p15_p15_p15_w12m1:
+** sub (w[0-9]+), w12, #?1
+** psel p15, p15, p15\.s\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p15_p15_p15_w12m1, svbool_t,
+ p15 = svpsel_b32 (p15, p15, w12 - 1),
+ p15 = svpsel_b32 (p15, p15, w12 - 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** psel_p0_p2_p7_0:
+** mov [wx](1[2-5]), #?0
+** psel p0, p2, p7\.d\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p2_p7_0, svbool_t,
+ p0 = svpsel_b64 (p2, p7, 0),
+ p0 = svpsel_b64 (p2, p7, 0))
+
+/*
+** psel_p2_p7_p8_w11:
+** mov [wx](1[2-5]), [wx]11
+** psel p2, p7, p8\.d\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p7_p8_w11, svbool_t,
+ p2 = svpsel_b64 (p7, p8, w11),
+ p2 = svpsel_b64 (p7, p8, w11))
+
+/*
+** psel_p7_p8_p13_w12:
+** psel p7, p8, p13\.d\[w12, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p7_p8_p13_w12, svbool_t,
+ p7 = svpsel_b64 (p8, p13, w12),
+ p7 = svpsel_b64 (p8, p13, w12))
+
+/*
+** psel_p8_p13_p15_w15:
+** psel p8, p13, p15\.d\[w15, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p8_p13_p15_w15, svbool_t,
+ p8 = svpsel_b64 (p13, p15, w15),
+ p8 = svpsel_b64 (p13, p15, w15))
+
+/*
+** psel_p13_p15_p0_w16:
+** mov [wx](1[2-5]), [wx]16
+** psel p13, p15, p0\.d\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p15_p0_w16, svbool_t,
+ p13 = svpsel_b64 (p15, p0, w16),
+ p13 = svpsel_b64 (p15, p0, w16))
+
+/*
+** psel_p15_p13_p8_w12p1:
+** psel p15, p13, p8\.d\[w12, 1\]
+** ret
+*/
+TEST_SELECT_P (psel_p15_p13_p8_w12p1, svbool_t,
+ p15 = svpsel_b64 (p13, p8, w12 + 1),
+ p15 = svpsel_b64 (p13, p8, w12 + 1))
+
+/*
+** psel_p0_p0_p0_w12p2:
+** add (w[0-9]+), w12, #?2
+** psel p0, p0, p0\.d\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p0_p0_w12p2, svbool_t,
+ p0 = svpsel_b64 (p0, p0, w12 + 2),
+ p0 = svpsel_b64 (p0, p0, w12 + 2))
+
+/*
+** psel_p15_p15_p15_w12m1:
+** sub (w[0-9]+), w12, #?1
+** psel p15, p15, p15\.d\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p15_p15_p15_w12m1, svbool_t,
+ p15 = svpsel_b64 (p15, p15, w12 - 1),
+ p15 = svpsel_b64 (p15, p15, w12 - 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** psel_p0_p2_p7_0:
+** mov [wx](1[2-5]), #?0
+** psel p0, p2, p7\.b\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p2_p7_0, svbool_t,
+ p0 = svpsel_b8 (p2, p7, 0),
+ p0 = svpsel_b8 (p2, p7, 0))
+
+/*
+** psel_p2_p7_p8_w11:
+** mov [wx](1[2-5]), [wx]11
+** psel p2, p7, p8\.b\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p7_p8_w11, svbool_t,
+ p2 = svpsel_b8 (p7, p8, w11),
+ p2 = svpsel_b8 (p7, p8, w11))
+
+/*
+** psel_p7_p8_p13_w12:
+** psel p7, p8, p13\.b\[w12, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p7_p8_p13_w12, svbool_t,
+ p7 = svpsel_b8 (p8, p13, w12),
+ p7 = svpsel_b8 (p8, p13, w12))
+
+/*
+** psel_p8_p13_p15_w15:
+** psel p8, p13, p15\.b\[w15, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p8_p13_p15_w15, svbool_t,
+ p8 = svpsel_b8 (p13, p15, w15),
+ p8 = svpsel_b8 (p13, p15, w15))
+
+/*
+** psel_p13_p15_p0_w16:
+** mov [wx](1[2-5]), [wx]16
+** psel p13, p15, p0\.b\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p15_p0_w16, svbool_t,
+ p13 = svpsel_b8 (p15, p0, w16),
+ p13 = svpsel_b8 (p15, p0, w16))
+
+/*
+** psel_p15_p13_p8_w12p1:
+** psel p15, p13, p8\.b\[w12, 1\]
+** ret
+*/
+TEST_SELECT_P (psel_p15_p13_p8_w12p1, svbool_t,
+ p15 = svpsel_b8 (p13, p8, w12 + 1),
+ p15 = svpsel_b8 (p13, p8, w12 + 1))
+
+/*
+** psel_p13_p8_p7_w12p15:
+** psel p13, p8, p7\.b\[w12, 15\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p8_p7_w12p15, svbool_t,
+ p13 = svpsel_b8 (p8, p7, w12 + 15),
+ p13 = svpsel_b8 (p8, p7, w12 + 15))
+
+/*
+** psel_p0_p0_p0_w12p16:
+** add (w[0-9]+), w12, #?16
+** psel p0, p0, p0\.b\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p0_p0_w12p16, svbool_t,
+ p0 = svpsel_b8 (p0, p0, w12 + 16),
+ p0 = svpsel_b8 (p0, p0, w12 + 16))
+
+/*
+** psel_p15_p15_p15_w12m1:
+** sub (w[0-9]+), w12, #?1
+** psel p15, p15, p15\.b\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p15_p15_p15_w12m1, svbool_t,
+ p15 = svpsel_b8 (p15, p15, w12 - 1),
+ p15 = svpsel_b8 (p15, p15, w12 - 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** psel_p0_p2_p7_0:
+** mov [wx](1[2-5]), #?0
+** psel p0, p2, p7\.h\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p2_p7_0, svcount_t,
+ p0 = svpsel_c16 (p2, p7, 0),
+ p0 = svpsel_c16 (p2, p7, 0))
+
+/*
+** psel_p2_p0_p8_w11:
+** mov [wx](1[2-5]), [wx]11
+** psel p2, p0, p8\.h\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p0_p8_w11, svcount_t,
+ p2 = svpsel_c16 (p0, p8, w11),
+ p2 = svpsel_c16 (p0, p8, w11))
+
+/*
+** psel_p2_p13_p15_w12:
+** psel p2, p13, p15\.h\[w12, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p13_p15_w12, svcount_t,
+ p2 = svpsel_c16 (p13, p15, w12),
+ p2 = svpsel_c16 (p13, p15, w12))
+
+/*
+** psel_p0_p13_p15_w15:
+** psel p0, p13, p15\.h\[w15, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p13_p15_w15, svcount_t,
+ p0 = svpsel_c16 (p13, p15, w15),
+ p0 = svpsel_c16 (p13, p15, w15))
+
+/*
+** psel_p13_p0_p15_w16:
+** mov [wx](1[2-5]), [wx]16
+** psel p13, p0, p15\.h\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p0_p15_w16, svcount_t,
+ p13 = svpsel_c16 (p0, p15, w16),
+ p13 = svpsel_c16 (p0, p15, w16))
+
+/*
+** psel_p2_p13_p8_w12p1:
+** psel p2, p13, p8\.h\[w12, 1\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p13_p8_w12p1, svcount_t,
+ p2 = svpsel_c16 (p13, p8, w12 + 1),
+ p2 = svpsel_c16 (p13, p8, w12 + 1))
+
+/*
+** psel_p13_p0_p7_w12p7:
+** psel p13, p0, p7\.h\[w12, 7\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p0_p7_w12p7, svcount_t,
+ p13 = svpsel_c16 (p0, p7, w12 + 7),
+ p13 = svpsel_c16 (p0, p7, w12 + 7))
+
+/*
+** psel_p0_p0_p15_w12p8:
+** add (w[0-9]+), w12, #?8
+** psel p0, p0, p15\.h\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p0_p15_w12p8, svcount_t,
+ p0 = svpsel_c16 (p0, p15, w12 + 8),
+ p0 = svpsel_c16 (p0, p15, w12 + 8))
+
+/*
+** psel_p13_p13_p7_w12m1:
+** sub (w[0-9]+), w12, #?1
+** psel p13, p13, p7\.h\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p13_p7_w12m1, svcount_t,
+ p13 = svpsel_c16 (p13, p7, w12 - 1),
+ p13 = svpsel_c16 (p13, p7, w12 - 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** psel_p0_p2_p7_0:
+** mov [wx](1[2-5]), #?0
+** psel p0, p2, p7\.s\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p2_p7_0, svcount_t,
+ p0 = svpsel_c32 (p2, p7, 0),
+ p0 = svpsel_c32 (p2, p7, 0))
+
+/*
+** psel_p2_p13_p8_w11:
+** mov [wx](1[2-5]), [wx]11
+** psel p2, p13, p8\.s\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p13_p8_w11, svcount_t,
+ p2 = svpsel_c32 (p13, p8, w11),
+ p2 = svpsel_c32 (p13, p8, w11))
+
+/*
+** psel_p0_p13_p15_w12:
+** psel p0, p13, p15\.s\[w12, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p13_p15_w12, svcount_t,
+ p0 = svpsel_c32 (p13, p15, w12),
+ p0 = svpsel_c32 (p13, p15, w12))
+
+/*
+** psel_p2_p0_p15_w15:
+** psel p2, p0, p15\.s\[w15, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p0_p15_w15, svcount_t,
+ p2 = svpsel_c32 (p0, p15, w15),
+ p2 = svpsel_c32 (p0, p15, w15))
+
+/*
+** psel_p13_p0_p7_w16:
+** mov [wx](1[2-5]), [wx]16
+** psel p13, p0, p7\.s\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p0_p7_w16, svcount_t,
+ p13 = svpsel_c32 (p0, p7, w16),
+ p13 = svpsel_c32 (p0, p7, w16))
+
+/*
+** psel_p2_p13_p8_w12p1:
+** psel p2, p13, p8\.s\[w12, 1\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p13_p8_w12p1, svcount_t,
+ p2 = svpsel_c32 (p13, p8, w12 + 1),
+ p2 = svpsel_c32 (p13, p8, w12 + 1))
+
+/*
+** psel_p13_p0_p7_w12p3:
+** psel p13, p0, p7\.s\[w12, 3\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p0_p7_w12p3, svcount_t,
+ p13 = svpsel_c32 (p0, p7, w12 + 3),
+ p13 = svpsel_c32 (p0, p7, w12 + 3))
+
+/*
+** psel_p0_p0_p7_w12p4:
+** add (w[0-9]+), w12, #?4
+** psel p0, p0, p7\.s\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p0_p7_w12p4, svcount_t,
+ p0 = svpsel_c32 (p0, p7, w12 + 4),
+ p0 = svpsel_c32 (p0, p7, w12 + 4))
+
+/*
+** psel_p13_p13_p15_w12m1:
+** sub (w[0-9]+), w12, #?1
+** psel p13, p13, p15\.s\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p13_p15_w12m1, svcount_t,
+ p13 = svpsel_c32 (p13, p15, w12 - 1),
+ p13 = svpsel_c32 (p13, p15, w12 - 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** psel_p0_p2_p7_0:
+** mov [wx](1[2-5]), #?0
+** psel p0, p2, p7\.d\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p2_p7_0, svcount_t,
+ p0 = svpsel_c64 (p2, p7, 0),
+ p0 = svpsel_c64 (p2, p7, 0))
+
+/*
+** psel_p2_p13_p8_w11:
+** mov [wx](1[2-5]), [wx]11
+** psel p2, p13, p8\.d\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p13_p8_w11, svcount_t,
+ p2 = svpsel_c64 (p13, p8, w11),
+ p2 = svpsel_c64 (p13, p8, w11))
+
+/*
+** psel_p2_p0_p15_w12:
+** psel p2, p0, p15\.d\[w12, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p0_p15_w12, svcount_t,
+ p2 = svpsel_c64 (p0, p15, w12),
+ p2 = svpsel_c64 (p0, p15, w12))
+
+/*
+** psel_p0_p13_p15_w15:
+** psel p0, p13, p15\.d\[w15, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p13_p15_w15, svcount_t,
+ p0 = svpsel_c64 (p13, p15, w15),
+ p0 = svpsel_c64 (p13, p15, w15))
+
+/*
+** psel_p13_p0_p15_w16:
+** mov [wx](1[2-5]), [wx]16
+** psel p13, p0, p15\.d\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p0_p15_w16, svcount_t,
+ p13 = svpsel_c64 (p0, p15, w16),
+ p13 = svpsel_c64 (p0, p15, w16))
+
+/*
+** psel_p2_p13_p8_w12p1:
+** psel p2, p13, p8\.d\[w12, 1\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p13_p8_w12p1, svcount_t,
+ p2 = svpsel_c64 (p13, p8, w12 + 1),
+ p2 = svpsel_c64 (p13, p8, w12 + 1))
+
+/*
+** psel_p0_p0_p8_w12p2:
+** add (w[0-9]+), w12, #?2
+** psel p0, p0, p8\.d\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p0_p8_w12p2, svcount_t,
+ p0 = svpsel_c64 (p0, p8, w12 + 2),
+ p0 = svpsel_c64 (p0, p8, w12 + 2))
+
+/*
+** psel_p13_p13_p15_w12m1:
+** sub (w[0-9]+), w12, #?1
+** psel p13, p13, p15\.d\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p13_p15_w12m1, svcount_t,
+ p13 = svpsel_c64 (p13, p15, w12 - 1),
+ p13 = svpsel_c64 (p13, p15, w12 - 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** psel_p0_p2_p7_0:
+** mov [wx](1[2-5]), #?0
+** psel p0, p2, p7\.b\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p2_p7_0, svcount_t,
+ p0 = svpsel_c8 (p2, p7, 0),
+ p0 = svpsel_c8 (p2, p7, 0))
+
+/*
+** psel_p2_p0_p8_w11:
+** mov [wx](1[2-5]), [wx]11
+** psel p2, p0, p8\.b\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p0_p8_w11, svcount_t,
+ p2 = svpsel_c8 (p0, p8, w11),
+ p2 = svpsel_c8 (p0, p8, w11))
+
+/*
+** psel_p0_p13_p15_w12:
+** psel p0, p13, p15\.b\[w12, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p13_p15_w12, svcount_t,
+ p0 = svpsel_c8 (p13, p15, w12),
+ p0 = svpsel_c8 (p13, p15, w12))
+
+/*
+** psel_p13_p0_p8_w15:
+** psel p13, p0, p8\.b\[w15, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p0_p8_w15, svcount_t,
+ p13 = svpsel_c8 (p0, p8, w15),
+ p13 = svpsel_c8 (p0, p8, w15))
+
+/*
+** psel_p2_p13_p7_w16:
+** mov [wx](1[2-5]), [wx]16
+** psel p2, p13, p7\.b\[w\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p2_p13_p7_w16, svcount_t,
+ p2 = svpsel_c8 (p13, p7, w16),
+ p2 = svpsel_c8 (p13, p7, w16))
+
+/*
+** psel_p0_p13_p8_w12p1:
+** psel p0, p13, p8\.b\[w12, 1\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p13_p8_w12p1, svcount_t,
+ p0 = svpsel_c8 (p13, p8, w12 + 1),
+ p0 = svpsel_c8 (p13, p8, w12 + 1))
+
+/*
+** psel_p13_p2_p7_w12p15:
+** psel p13, p2, p7\.b\[w12, 15\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p2_p7_w12p15, svcount_t,
+ p13 = svpsel_c8 (p2, p7, w12 + 15),
+ p13 = svpsel_c8 (p2, p7, w12 + 15))
+
+/*
+** psel_p0_p0_p15_w12p16:
+** add (w[0-9]+), w12, #?16
+** psel p0, p0, p15\.b\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p0_p0_p15_w12p16, svcount_t,
+ p0 = svpsel_c8 (p0, p15, w12 + 16),
+ p0 = svpsel_c8 (p0, p15, w12 + 16))
+
+/*
+** psel_p13_p13_p15_w12m1:
+** sub (w[0-9]+), w12, #?1
+** psel p13, p13, p15\.b\[\1, 0\]
+** ret
+*/
+TEST_SELECT_P (psel_p13_p13_p15_w12m1, svcount_t,
+ p13 = svpsel_c8 (p13, p15, w12 - 1),
+ p13 = svpsel_c8 (p13, p15, w12 - 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.h
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c16 (),
+ pn0 = svptrue_c16 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.h
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c16 (),
+ pn7 = svptrue_c16 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.h
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c16 (),
+ pn8 = svptrue_c16 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.h
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c16 (),
+ pn15 = svptrue_c16 ())
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.s
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c32 (),
+ pn0 = svptrue_c32 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.s
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c32 (),
+ pn7 = svptrue_c32 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.s
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c32 (),
+ pn8 = svptrue_c32 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.s
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c32 (),
+ pn15 = svptrue_c32 ())
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.d
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c64 (),
+ pn0 = svptrue_c64 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.d
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c64 (),
+ pn7 = svptrue_c64 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.d
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c64 (),
+ pn8 = svptrue_c64 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.d
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c64 (),
+ pn15 = svptrue_c64 ())
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.b
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c8 (),
+ pn0 = svptrue_c8 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.b
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c8 (),
+ pn7 = svptrue_c8 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.b
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c8 (),
+ pn8 = svptrue_c8 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.b
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c8 (),
+ pn15 = svptrue_c8 ())
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvt_z0_z0:
+** sqcvt z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z0_z0, svint32x2_t, svint16_t,
+ z0_res = svqcvt_s16_s32_x2 (z0),
+ z0_res = svqcvt_s16 (z0))
+
+/*
+** qcvt_z0_z6:
+** sqcvt z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z0_z6, svint32x2_t, svint16_t,
+ z0_res = svqcvt_s16_s32_x2 (z6),
+ z0_res = svqcvt_s16 (z6))
+
+/*
+** qcvt_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** sqcvt z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvt_z0_z29, svint32x2_t, svint16_t,
+ z0_res = svqcvt_s16_s32_x2 (z29),
+ z0_res = svqcvt_s16 (z29))
+
+/*
+** qcvt_z5_z0:
+** sqcvt z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z5_z0, svint32x2_t, svint16_t,
+ z5 = svqcvt_s16_s32_x2 (z0),
+ z5 = svqcvt_s16 (z0))
+
+/*
+** qcvt_z22_z16:
+** sqcvt z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z22_z16, svint32x2_t, svint16_t,
+ z22 = svqcvt_s16_s32_x2 (z16),
+ z22 = svqcvt_s16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvt_z0_z0:
+** sqcvt z0\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z0, svint64x4_t, svint16_t,
+ z0_res = svqcvt_s16_s64_x4 (z0),
+ z0_res = svqcvt_s16 (z0))
+
+/*
+** qcvt_z0_z4:
+** sqcvt z0\.h, {z4\.d - z7\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z4, svint64x4_t, svint16_t,
+ z0_res = svqcvt_s16_s64_x4 (z4),
+ z0_res = svqcvt_s16 (z4))
+
+/*
+** qcvt_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvt z0\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z21, svint64x4_t, svint16_t,
+ z0_res = svqcvt_s16_s64_x4 (z21),
+ z0_res = svqcvt_s16 (z21))
+
+/*
+** qcvt_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvt z25\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z26, svint64x4_t, svint16_t,
+ z25 = svqcvt_s16_s64_x4 (z26),
+ z25 = svqcvt_s16 (z26))
+
+/*
+** qcvt_z25_z0:
+** sqcvt z25\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z0, svint64x4_t, svint16_t,
+ z25 = svqcvt_s16_s64_x4 (z0),
+ z25 = svqcvt_s16 (z0))
+
+/*
+** qcvt_z22_z16:
+** sqcvt z22\.h, {z16\.d - z19\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z22_z16, svint64x4_t, svint16_t,
+ z22_res = svqcvt_s16_s64_x4 (z16),
+ z22_res = svqcvt_s16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvt_z0_z0:
+** sqcvt z0\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z0, svint32x4_t, svint8_t,
+ z0_res = svqcvt_s8_s32_x4 (z0),
+ z0_res = svqcvt_s8 (z0))
+
+/*
+** qcvt_z0_z4:
+** sqcvt z0\.b, {z4\.s - z7\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z4, svint32x4_t, svint8_t,
+ z0_res = svqcvt_s8_s32_x4 (z4),
+ z0_res = svqcvt_s8 (z4))
+
+/*
+** qcvt_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvt z0\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z21, svint32x4_t, svint8_t,
+ z0_res = svqcvt_s8_s32_x4 (z21),
+ z0_res = svqcvt_s8 (z21))
+
+/*
+** qcvt_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvt z25\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z26, svint32x4_t, svint8_t,
+ z25 = svqcvt_s8_s32_x4 (z26),
+ z25 = svqcvt_s8 (z26))
+
+/*
+** qcvt_z25_z0:
+** sqcvt z25\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z0, svint32x4_t, svint8_t,
+ z25 = svqcvt_s8_s32_x4 (z0),
+ z25 = svqcvt_s8 (z0))
+
+/*
+** qcvt_z22_z16:
+** sqcvt z22\.b, {z16\.s - z19\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z22_z16, svint32x4_t, svint8_t,
+ z22_res = svqcvt_s8_s32_x4 (z16),
+ z22_res = svqcvt_s8 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvt_z0_z0:
+** sqcvtu z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z0_z0, svint32x2_t, svuint16_t,
+ z0_res = svqcvt_u16_s32_x2 (z0),
+ z0_res = svqcvt_u16 (z0))
+
+/*
+** qcvt_z0_z6:
+** sqcvtu z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z0_z6, svint32x2_t, svuint16_t,
+ z0_res = svqcvt_u16_s32_x2 (z6),
+ z0_res = svqcvt_u16 (z6))
+
+/*
+** qcvt_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtu z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvt_z0_z29, svint32x2_t, svuint16_t,
+ z0_res = svqcvt_u16_s32_x2 (z29),
+ z0_res = svqcvt_u16 (z29))
+
+/*
+** qcvt_z5_z0:
+** sqcvtu z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z5_z0, svint32x2_t, svuint16_t,
+ z5 = svqcvt_u16_s32_x2 (z0),
+ z5 = svqcvt_u16 (z0))
+
+/*
+** qcvt_z22_z16:
+** sqcvtu z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z22_z16, svint32x2_t, svuint16_t,
+ z22 = svqcvt_u16_s32_x2 (z16),
+ z22 = svqcvt_u16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvt_z0_z0:
+** sqcvtu z0\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z0, svint64x4_t, svuint16_t,
+ z0_res = svqcvt_u16_s64_x4 (z0),
+ z0_res = svqcvt_u16 (z0))
+
+/*
+** qcvt_z0_z4:
+** sqcvtu z0\.h, {z4\.d - z7\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z4, svint64x4_t, svuint16_t,
+ z0_res = svqcvt_u16_s64_x4 (z4),
+ z0_res = svqcvt_u16 (z4))
+
+/*
+** qcvt_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtu z0\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z21, svint64x4_t, svuint16_t,
+ z0_res = svqcvt_u16_s64_x4 (z21),
+ z0_res = svqcvt_u16 (z21))
+
+/*
+** qcvt_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtu z25\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z26, svint64x4_t, svuint16_t,
+ z25 = svqcvt_u16_s64_x4 (z26),
+ z25 = svqcvt_u16 (z26))
+
+/*
+** qcvt_z25_z0:
+** sqcvtu z25\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z0, svint64x4_t, svuint16_t,
+ z25 = svqcvt_u16_s64_x4 (z0),
+ z25 = svqcvt_u16 (z0))
+
+/*
+** qcvt_z22_z16:
+** sqcvtu z22\.h, {z16\.d - z19\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z22_z16, svint64x4_t, svuint16_t,
+ z22_res = svqcvt_u16_s64_x4 (z16),
+ z22_res = svqcvt_u16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvt_z0_z0:
+** uqcvt z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z0_z0, svuint32x2_t, svuint16_t,
+ z0_res = svqcvt_u16_u32_x2 (z0),
+ z0_res = svqcvt_u16 (z0))
+
+/*
+** qcvt_z0_z6:
+** uqcvt z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z0_z6, svuint32x2_t, svuint16_t,
+ z0_res = svqcvt_u16_u32_x2 (z6),
+ z0_res = svqcvt_u16 (z6))
+
+/*
+** qcvt_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** uqcvt z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvt_z0_z29, svuint32x2_t, svuint16_t,
+ z0_res = svqcvt_u16_u32_x2 (z29),
+ z0_res = svqcvt_u16 (z29))
+
+/*
+** qcvt_z5_z0:
+** uqcvt z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z5_z0, svuint32x2_t, svuint16_t,
+ z5 = svqcvt_u16_u32_x2 (z0),
+ z5 = svqcvt_u16 (z0))
+
+/*
+** qcvt_z22_z16:
+** uqcvt z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvt_z22_z16, svuint32x2_t, svuint16_t,
+ z22 = svqcvt_u16_u32_x2 (z16),
+ z22 = svqcvt_u16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvt_z0_z0:
+** uqcvt z0\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z0, svuint64x4_t, svuint16_t,
+ z0_res = svqcvt_u16_u64_x4 (z0),
+ z0_res = svqcvt_u16 (z0))
+
+/*
+** qcvt_z0_z4:
+** uqcvt z0\.h, {z4\.d - z7\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z4, svuint64x4_t, svuint16_t,
+ z0_res = svqcvt_u16_u64_x4 (z4),
+ z0_res = svqcvt_u16 (z4))
+
+/*
+** qcvt_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqcvt z0\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z21, svuint64x4_t, svuint16_t,
+ z0_res = svqcvt_u16_u64_x4 (z21),
+ z0_res = svqcvt_u16 (z21))
+
+/*
+** qcvt_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqcvt z25\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z26, svuint64x4_t, svuint16_t,
+ z25 = svqcvt_u16_u64_x4 (z26),
+ z25 = svqcvt_u16 (z26))
+
+/*
+** qcvt_z25_z0:
+** uqcvt z25\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z0, svuint64x4_t, svuint16_t,
+ z25 = svqcvt_u16_u64_x4 (z0),
+ z25 = svqcvt_u16 (z0))
+
+/*
+** qcvt_z22_z16:
+** uqcvt z22\.h, {z16\.d - z19\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z22_z16, svuint64x4_t, svuint16_t,
+ z22_res = svqcvt_u16_u64_x4 (z16),
+ z22_res = svqcvt_u16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvt_z0_z0:
+** sqcvtu z0\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z0, svint32x4_t, svuint8_t,
+ z0_res = svqcvt_u8_s32_x4 (z0),
+ z0_res = svqcvt_u8 (z0))
+
+/*
+** qcvt_z0_z4:
+** sqcvtu z0\.b, {z4\.s - z7\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z4, svint32x4_t, svuint8_t,
+ z0_res = svqcvt_u8_s32_x4 (z4),
+ z0_res = svqcvt_u8 (z4))
+
+/*
+** qcvt_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtu z0\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z21, svint32x4_t, svuint8_t,
+ z0_res = svqcvt_u8_s32_x4 (z21),
+ z0_res = svqcvt_u8 (z21))
+
+/*
+** qcvt_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtu z25\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z26, svint32x4_t, svuint8_t,
+ z25 = svqcvt_u8_s32_x4 (z26),
+ z25 = svqcvt_u8 (z26))
+
+/*
+** qcvt_z25_z0:
+** sqcvtu z25\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z0, svint32x4_t, svuint8_t,
+ z25 = svqcvt_u8_s32_x4 (z0),
+ z25 = svqcvt_u8 (z0))
+
+/*
+** qcvt_z22_z16:
+** sqcvtu z22\.b, {z16\.s - z19\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z22_z16, svint32x4_t, svuint8_t,
+ z22_res = svqcvt_u8_s32_x4 (z16),
+ z22_res = svqcvt_u8 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvt_z0_z0:
+** uqcvt z0\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z0, svuint32x4_t, svuint8_t,
+ z0_res = svqcvt_u8_u32_x4 (z0),
+ z0_res = svqcvt_u8 (z0))
+
+/*
+** qcvt_z0_z4:
+** uqcvt z0\.b, {z4\.s - z7\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z4, svuint32x4_t, svuint8_t,
+ z0_res = svqcvt_u8_u32_x4 (z4),
+ z0_res = svqcvt_u8 (z4))
+
+/*
+** qcvt_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqcvt z0\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z0_z21, svuint32x4_t, svuint8_t,
+ z0_res = svqcvt_u8_u32_x4 (z21),
+ z0_res = svqcvt_u8 (z21))
+
+/*
+** qcvt_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqcvt z25\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z26, svuint32x4_t, svuint8_t,
+ z25 = svqcvt_u8_u32_x4 (z26),
+ z25 = svqcvt_u8 (z26))
+
+/*
+** qcvt_z25_z0:
+** uqcvt z25\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z25_z0, svuint32x4_t, svuint8_t,
+ z25 = svqcvt_u8_u32_x4 (z0),
+ z25 = svqcvt_u8 (z0))
+
+/*
+** qcvt_z22_z16:
+** uqcvt z22\.b, {z16\.s - z19\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvt_z22_z16, svuint32x4_t, svuint8_t,
+ z22_res = svqcvt_u8_u32_x4 (z16),
+ z22_res = svqcvt_u8 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvtn_z0_z0:
+** sqcvtn z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z0, svint32x2_t, svint16_t,
+ z0_res = svqcvtn_s16_s32_x2 (z0),
+ z0_res = svqcvtn_s16 (z0))
+
+/*
+** qcvtn_z0_z6:
+** sqcvtn z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z6, svint32x2_t, svint16_t,
+ z0_res = svqcvtn_s16_s32_x2 (z6),
+ z0_res = svqcvtn_s16 (z6))
+
+/*
+** qcvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z29, svint32x2_t, svint16_t,
+ z0_res = svqcvtn_s16_s32_x2 (z29),
+ z0_res = svqcvtn_s16 (z29))
+
+/*
+** qcvtn_z5_z0:
+** sqcvtn z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z5_z0, svint32x2_t, svint16_t,
+ z5 = svqcvtn_s16_s32_x2 (z0),
+ z5 = svqcvtn_s16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtn z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z22_z16, svint32x2_t, svint16_t,
+ z22 = svqcvtn_s16_s32_x2 (z16),
+ z22 = svqcvtn_s16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvtn_z0_z0:
+** sqcvtn z0\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z0, svint64x4_t, svint16_t,
+ z0_res = svqcvtn_s16_s64_x4 (z0),
+ z0_res = svqcvtn_s16 (z0))
+
+/*
+** qcvtn_z0_z4:
+** sqcvtn z0\.h, {z4\.d - z7\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z4, svint64x4_t, svint16_t,
+ z0_res = svqcvtn_s16_s64_x4 (z4),
+ z0_res = svqcvtn_s16 (z4))
+
+/*
+** qcvtn_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z21, svint64x4_t, svint16_t,
+ z0_res = svqcvtn_s16_s64_x4 (z21),
+ z0_res = svqcvtn_s16 (z21))
+
+/*
+** qcvtn_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtn z25\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z26, svint64x4_t, svint16_t,
+ z25 = svqcvtn_s16_s64_x4 (z26),
+ z25 = svqcvtn_s16 (z26))
+
+/*
+** qcvtn_z25_z0:
+** sqcvtn z25\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z0, svint64x4_t, svint16_t,
+ z25 = svqcvtn_s16_s64_x4 (z0),
+ z25 = svqcvtn_s16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtn z22\.h, {z16\.d - z19\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z22_z16, svint64x4_t, svint16_t,
+ z22_res = svqcvtn_s16_s64_x4 (z16),
+ z22_res = svqcvtn_s16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvtn_z0_z0:
+** sqcvtn z0\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z0, svint32x4_t, svint8_t,
+ z0_res = svqcvtn_s8_s32_x4 (z0),
+ z0_res = svqcvtn_s8 (z0))
+
+/*
+** qcvtn_z0_z4:
+** sqcvtn z0\.b, {z4\.s - z7\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z4, svint32x4_t, svint8_t,
+ z0_res = svqcvtn_s8_s32_x4 (z4),
+ z0_res = svqcvtn_s8 (z4))
+
+/*
+** qcvtn_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtn z0\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z21, svint32x4_t, svint8_t,
+ z0_res = svqcvtn_s8_s32_x4 (z21),
+ z0_res = svqcvtn_s8 (z21))
+
+/*
+** qcvtn_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtn z25\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z26, svint32x4_t, svint8_t,
+ z25 = svqcvtn_s8_s32_x4 (z26),
+ z25 = svqcvtn_s8 (z26))
+
+/*
+** qcvtn_z25_z0:
+** sqcvtn z25\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z0, svint32x4_t, svint8_t,
+ z25 = svqcvtn_s8_s32_x4 (z0),
+ z25 = svqcvtn_s8 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtn z22\.b, {z16\.s - z19\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z22_z16, svint32x4_t, svint8_t,
+ z22_res = svqcvtn_s8_s32_x4 (z16),
+ z22_res = svqcvtn_s8 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvtn_z0_z0:
+** sqcvtun z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z0, svint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_s32_x2 (z0),
+ z0_res = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z0_z6:
+** sqcvtun z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z6, svint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_s32_x2 (z6),
+ z0_res = svqcvtn_u16 (z6))
+
+/*
+** qcvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtun z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z29, svint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_s32_x2 (z29),
+ z0_res = svqcvtn_u16 (z29))
+
+/*
+** qcvtn_z5_z0:
+** sqcvtun z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z5_z0, svint32x2_t, svuint16_t,
+ z5 = svqcvtn_u16_s32_x2 (z0),
+ z5 = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtun z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z22_z16, svint32x2_t, svuint16_t,
+ z22 = svqcvtn_u16_s32_x2 (z16),
+ z22 = svqcvtn_u16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvtn_z0_z0:
+** sqcvtun z0\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z0, svint64x4_t, svuint16_t,
+ z0_res = svqcvtn_u16_s64_x4 (z0),
+ z0_res = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z0_z4:
+** sqcvtun z0\.h, {z4\.d - z7\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z4, svint64x4_t, svuint16_t,
+ z0_res = svqcvtn_u16_s64_x4 (z4),
+ z0_res = svqcvtn_u16 (z4))
+
+/*
+** qcvtn_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtun z0\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z21, svint64x4_t, svuint16_t,
+ z0_res = svqcvtn_u16_s64_x4 (z21),
+ z0_res = svqcvtn_u16 (z21))
+
+/*
+** qcvtn_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtun z25\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z26, svint64x4_t, svuint16_t,
+ z25 = svqcvtn_u16_s64_x4 (z26),
+ z25 = svqcvtn_u16 (z26))
+
+/*
+** qcvtn_z25_z0:
+** sqcvtun z25\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z0, svint64x4_t, svuint16_t,
+ z25 = svqcvtn_u16_s64_x4 (z0),
+ z25 = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtun z22\.h, {z16\.d - z19\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z22_z16, svint64x4_t, svuint16_t,
+ z22_res = svqcvtn_u16_s64_x4 (z16),
+ z22_res = svqcvtn_u16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvtn_z0_z0:
+** uqcvtn z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z0, svuint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_u32_x2 (z0),
+ z0_res = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z0_z6:
+** uqcvtn z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z6, svuint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_u32_x2 (z6),
+ z0_res = svqcvtn_u16 (z6))
+
+/*
+** qcvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** uqcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z29, svuint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_u32_x2 (z29),
+ z0_res = svqcvtn_u16 (z29))
+
+/*
+** qcvtn_z5_z0:
+** uqcvtn z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z5_z0, svuint32x2_t, svuint16_t,
+ z5 = svqcvtn_u16_u32_x2 (z0),
+ z5 = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** uqcvtn z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z22_z16, svuint32x2_t, svuint16_t,
+ z22 = svqcvtn_u16_u32_x2 (z16),
+ z22 = svqcvtn_u16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvtn_z0_z0:
+** uqcvtn z0\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z0, svuint64x4_t, svuint16_t,
+ z0_res = svqcvtn_u16_u64_x4 (z0),
+ z0_res = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z0_z4:
+** uqcvtn z0\.h, {z4\.d - z7\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z4, svuint64x4_t, svuint16_t,
+ z0_res = svqcvtn_u16_u64_x4 (z4),
+ z0_res = svqcvtn_u16 (z4))
+
+/*
+** qcvtn_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z21, svuint64x4_t, svuint16_t,
+ z0_res = svqcvtn_u16_u64_x4 (z21),
+ z0_res = svqcvtn_u16 (z21))
+
+/*
+** qcvtn_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqcvtn z25\.h, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z26, svuint64x4_t, svuint16_t,
+ z25 = svqcvtn_u16_u64_x4 (z26),
+ z25 = svqcvtn_u16 (z26))
+
+/*
+** qcvtn_z25_z0:
+** uqcvtn z25\.h, {z0\.d - z3\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z0, svuint64x4_t, svuint16_t,
+ z25 = svqcvtn_u16_u64_x4 (z0),
+ z25 = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** uqcvtn z22\.h, {z16\.d - z19\.d}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z22_z16, svuint64x4_t, svuint16_t,
+ z22_res = svqcvtn_u16_u64_x4 (z16),
+ z22_res = svqcvtn_u16 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvtn_z0_z0:
+** sqcvtun z0\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z0, svint32x4_t, svuint8_t,
+ z0_res = svqcvtn_u8_s32_x4 (z0),
+ z0_res = svqcvtn_u8 (z0))
+
+/*
+** qcvtn_z0_z4:
+** sqcvtun z0\.b, {z4\.s - z7\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z4, svint32x4_t, svuint8_t,
+ z0_res = svqcvtn_u8_s32_x4 (z4),
+ z0_res = svqcvtn_u8 (z4))
+
+/*
+** qcvtn_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtun z0\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z21, svint32x4_t, svuint8_t,
+ z0_res = svqcvtn_u8_s32_x4 (z21),
+ z0_res = svqcvtn_u8 (z21))
+
+/*
+** qcvtn_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtun z25\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z26, svint32x4_t, svuint8_t,
+ z25 = svqcvtn_u8_s32_x4 (z26),
+ z25 = svqcvtn_u8 (z26))
+
+/*
+** qcvtn_z25_z0:
+** sqcvtun z25\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z0, svint32x4_t, svuint8_t,
+ z25 = svqcvtn_u8_s32_x4 (z0),
+ z25 = svqcvtn_u8 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtun z22\.b, {z16\.s - z19\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z22_z16, svint32x4_t, svuint8_t,
+ z22_res = svqcvtn_u8_s32_x4 (z16),
+ z22_res = svqcvtn_u8 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qcvtn_z0_z0:
+** uqcvtn z0\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z0, svuint32x4_t, svuint8_t,
+ z0_res = svqcvtn_u8_u32_x4 (z0),
+ z0_res = svqcvtn_u8 (z0))
+
+/*
+** qcvtn_z0_z4:
+** uqcvtn z0\.b, {z4\.s - z7\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z4, svuint32x4_t, svuint8_t,
+ z0_res = svqcvtn_u8_u32_x4 (z4),
+ z0_res = svqcvtn_u8 (z4))
+
+/*
+** qcvtn_z0_z21:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqcvtn z0\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z0_z21, svuint32x4_t, svuint8_t,
+ z0_res = svqcvtn_u8_u32_x4 (z21),
+ z0_res = svqcvtn_u8 (z21))
+
+/*
+** qcvtn_z25_z26:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqcvtn z25\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z26, svuint32x4_t, svuint8_t,
+ z25 = svqcvtn_u8_u32_x4 (z26),
+ z25 = svqcvtn_u8 (z26))
+
+/*
+** qcvtn_z25_z0:
+** uqcvtn z25\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z25_z0, svuint32x4_t, svuint8_t,
+ z25 = svqcvtn_u8_u32_x4 (z0),
+ z25 = svqcvtn_u8 (z0))
+
+/*
+** qcvtn_z22_z16:
+** uqcvtn z22\.b, {z16\.s - z19\.s}
+** ret
+*/
+TEST_X4_NARROW (qcvtn_z22_z16, svuint32x4_t, svuint8_t,
+ z22_res = svqcvtn_u8_u32_x4 (z16),
+ z22_res = svqcvtn_u8 (z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qdmulh_z0_z0_z4:
+** sqdmulh {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z4, svint16x2_t, z0,
+ svqdmulh_s16_x2 (z0, z4),
+ svqdmulh (z0, z4))
+
+/*
+** qdmulh_z0_z4_z0:
+** sqdmulh {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z0, svint16x2_t, z0,
+ svqdmulh_s16_x2 (z4, z0),
+ svqdmulh (z4, z0))
+
+/*
+** qdmulh_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.h - z29\.h}
+** |
+** sqdmulh [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z28, svint16x2_t, z0,
+ svqdmulh_s16_x2 (z4, z28),
+ svqdmulh (z4, z28))
+
+/*
+** qdmulh_z18_z18_z4:
+** sqdmulh {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (qdmulh_z18_z18_z4, svint16x2_t, z18,
+ svqdmulh_s16_x2 (z18, z4),
+ svqdmulh (z18, z4))
+
+/*
+** qdmulh_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z23_z23_z18, svint16x2_t, z23,
+ svqdmulh_s16_x2 (z23, z18),
+ svqdmulh (z23, z18))
+
+/*
+** qdmulh_z28_z28_z0:
+** sqdmulh {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (qdmulh_z28_z28_z0, svint16x2_t, z28,
+ svqdmulh_s16_x2 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_z0_z0_z18:
+** sqdmulh {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z18, svint16x2_t, z0,
+ svqdmulh_s16_x2 (z0, z18),
+ svqdmulh (z0, z18))
+
+/*
+** qdmulh_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** sqdmulh {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z4_z4_z23, svint16x2_t, z4,
+ svqdmulh_s16_x2 (z4, z23),
+ svqdmulh (z4, z23))
+
+/*
+** qdmulh_single_z24_z24_z0:
+** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint16x2_t, svint16_t, z24,
+ svqdmulh_single_s16_x2 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** sqdmulh {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint16x2_t, svint16_t, z24,
+ svqdmulh_single_s16_x2 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint16x2_t, svint16_t, z24,
+ svqdmulh_single_s16_x2 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z1_z24_z0:
+** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint16x2_t, svint16_t, z1,
+ svqdmulh_single_s16_x2 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint16x2_t, svint16_t, z1,
+ svqdmulh_single_s16_x2 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z18_z18_z0:
+** sqdmulh {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint16x2_t, svint16_t, z18,
+ svqdmulh_single_s16_x2 (z18, z0),
+ svqdmulh (z18, z0))
+
+/*
+** qdmulh_single_awkward:
+** ...
+** sqdmulh ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint16x2_t, svint16_t,
+ z0_res = svqdmulh_single_s16_x2 (z1, z0),
+ z0_res = svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z0_z0_z15:
+** ...
+** sqdmulh {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint16x2_t, svint16_t,
+ z0 = svqdmulh_single_s16_x2 (z0, z15),
+ z0 = svqdmulh (z0, z15))
+
+/*
+** qdmulh_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint16x2_t, svint16_t, z24,
+ svqdmulh_single_s16_x2 (z24, z16),
+ svqdmulh (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qdmulh_z0_z0_z4:
+** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z4, svint16x4_t, z0,
+ svqdmulh_s16_x4 (z0, z4),
+ svqdmulh (z0, z4))
+
+/*
+** qdmulh_z0_z4_z0:
+** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z0, svint16x4_t, z0,
+ svqdmulh_s16_x4 (z4, z0),
+ svqdmulh (z4, z0))
+
+/*
+** qdmulh_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.h - z31\.h}
+** |
+** sqdmulh [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z28, svint16x4_t, z0,
+ svqdmulh_s16_x4 (z4, z28),
+ svqdmulh (z4, z28))
+
+/*
+** qdmulh_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z18_z18_z4, svint16x4_t, z18,
+ svqdmulh_s16_x4 (z18, z4),
+ svqdmulh (z18, z4))
+
+/*
+** qdmulh_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z23_z23_z28, svint16x4_t, z23,
+ svqdmulh_s16_x4 (z23, z28),
+ svqdmulh (z23, z28))
+
+/*
+** qdmulh_z28_z28_z0:
+** sqdmulh {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (qdmulh_z28_z28_z0, svint16x4_t, z28,
+ svqdmulh_s16_x4 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z18, svint16x4_t, z0,
+ svqdmulh_s16_x4 (z0, z18),
+ svqdmulh (z0, z18))
+
+/*
+** qdmulh_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** sqdmulh {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z4_z4_z23, svint16x4_t, z4,
+ svqdmulh_s16_x4 (z4, z23),
+ svqdmulh (z4, z23))
+
+/*
+** qdmulh_single_z24_z24_z0:
+** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint16x4_t, svint16_t, z24,
+ svqdmulh_single_s16_x4 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** sqdmulh {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint16x4_t, svint16_t, z24,
+ svqdmulh_single_s16_x4 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint16x4_t, svint16_t, z24,
+ svqdmulh_single_s16_x4 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z1_z24_z0:
+** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint16x4_t, svint16_t, z1,
+ svqdmulh_single_s16_x4 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint16x4_t, svint16_t, z1,
+ svqdmulh_single_s16_x4 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint16x4_t, svint16_t, z18,
+ svqdmulh_single_s16_x4 (z18, z0),
+ svqdmulh (z18, z0))
+
+/*
+** qdmulh_single_awkward:
+** ...
+** sqdmulh ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint16x4_t, svint16_t,
+ z0_res = svqdmulh_single_s16_x4 (z1, z0),
+ z0_res = svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z0_z0_z15:
+** ...
+** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint16x4_t, svint16_t,
+ z0 = svqdmulh_single_s16_x4 (z0, z15),
+ z0 = svqdmulh (z0, z15))
+
+/*
+** qdmulh_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint16x4_t, svint16_t, z24,
+ svqdmulh_single_s16_x4 (z24, z16),
+ svqdmulh (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qdmulh_z0_z0_z4:
+** sqdmulh {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z4, svint32x2_t, z0,
+ svqdmulh_s32_x2 (z0, z4),
+ svqdmulh (z0, z4))
+
+/*
+** qdmulh_z0_z4_z0:
+** sqdmulh {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z0, svint32x2_t, z0,
+ svqdmulh_s32_x2 (z4, z0),
+ svqdmulh (z4, z0))
+
+/*
+** qdmulh_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.s - z29\.s}
+** |
+** sqdmulh [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z28, svint32x2_t, z0,
+ svqdmulh_s32_x2 (z4, z28),
+ svqdmulh (z4, z28))
+
+/*
+** qdmulh_z18_z18_z4:
+** sqdmulh {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (qdmulh_z18_z18_z4, svint32x2_t, z18,
+ svqdmulh_s32_x2 (z18, z4),
+ svqdmulh (z18, z4))
+
+/*
+** qdmulh_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z23_z23_z18, svint32x2_t, z23,
+ svqdmulh_s32_x2 (z23, z18),
+ svqdmulh (z23, z18))
+
+/*
+** qdmulh_z28_z28_z0:
+** sqdmulh {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (qdmulh_z28_z28_z0, svint32x2_t, z28,
+ svqdmulh_s32_x2 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_z0_z0_z18:
+** sqdmulh {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z18, svint32x2_t, z0,
+ svqdmulh_s32_x2 (z0, z18),
+ svqdmulh (z0, z18))
+
+/*
+** qdmulh_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** sqdmulh {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z4_z4_z23, svint32x2_t, z4,
+ svqdmulh_s32_x2 (z4, z23),
+ svqdmulh (z4, z23))
+
+/*
+** qdmulh_single_z24_z24_z0:
+** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint32x2_t, svint32_t, z24,
+ svqdmulh_single_s32_x2 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** sqdmulh {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint32x2_t, svint32_t, z24,
+ svqdmulh_single_s32_x2 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint32x2_t, svint32_t, z24,
+ svqdmulh_single_s32_x2 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z1_z24_z0:
+** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint32x2_t, svint32_t, z1,
+ svqdmulh_single_s32_x2 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint32x2_t, svint32_t, z1,
+ svqdmulh_single_s32_x2 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z18_z18_z0:
+** sqdmulh {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint32x2_t, svint32_t, z18,
+ svqdmulh_single_s32_x2 (z18, z0),
+ svqdmulh (z18, z0))
+
+/*
+** qdmulh_single_awkward:
+** ...
+** sqdmulh ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint32x2_t, svint32_t,
+ z0_res = svqdmulh_single_s32_x2 (z1, z0),
+ z0_res = svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z0_z0_z15:
+** ...
+** sqdmulh {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint32x2_t, svint32_t,
+ z0 = svqdmulh_single_s32_x2 (z0, z15),
+ z0 = svqdmulh (z0, z15))
+
+/*
+** qdmulh_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint32x2_t, svint32_t, z24,
+ svqdmulh_single_s32_x2 (z24, z16),
+ svqdmulh (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qdmulh_z0_z0_z4:
+** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z4, svint32x4_t, z0,
+ svqdmulh_s32_x4 (z0, z4),
+ svqdmulh (z0, z4))
+
+/*
+** qdmulh_z0_z4_z0:
+** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z0, svint32x4_t, z0,
+ svqdmulh_s32_x4 (z4, z0),
+ svqdmulh (z4, z0))
+
+/*
+** qdmulh_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.s - z31\.s}
+** |
+** sqdmulh [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z28, svint32x4_t, z0,
+ svqdmulh_s32_x4 (z4, z28),
+ svqdmulh (z4, z28))
+
+/*
+** qdmulh_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z18_z18_z4, svint32x4_t, z18,
+ svqdmulh_s32_x4 (z18, z4),
+ svqdmulh (z18, z4))
+
+/*
+** qdmulh_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z23_z23_z28, svint32x4_t, z23,
+ svqdmulh_s32_x4 (z23, z28),
+ svqdmulh (z23, z28))
+
+/*
+** qdmulh_z28_z28_z0:
+** sqdmulh {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (qdmulh_z28_z28_z0, svint32x4_t, z28,
+ svqdmulh_s32_x4 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z18, svint32x4_t, z0,
+ svqdmulh_s32_x4 (z0, z18),
+ svqdmulh (z0, z18))
+
+/*
+** qdmulh_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** sqdmulh {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z4_z4_z23, svint32x4_t, z4,
+ svqdmulh_s32_x4 (z4, z23),
+ svqdmulh (z4, z23))
+
+/*
+** qdmulh_single_z24_z24_z0:
+** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint32x4_t, svint32_t, z24,
+ svqdmulh_single_s32_x4 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** sqdmulh {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint32x4_t, svint32_t, z24,
+ svqdmulh_single_s32_x4 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint32x4_t, svint32_t, z24,
+ svqdmulh_single_s32_x4 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z1_z24_z0:
+** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint32x4_t, svint32_t, z1,
+ svqdmulh_single_s32_x4 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint32x4_t, svint32_t, z1,
+ svqdmulh_single_s32_x4 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint32x4_t, svint32_t, z18,
+ svqdmulh_single_s32_x4 (z18, z0),
+ svqdmulh (z18, z0))
+
+/*
+** qdmulh_single_awkward:
+** ...
+** sqdmulh ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint32x4_t, svint32_t,
+ z0_res = svqdmulh_single_s32_x4 (z1, z0),
+ z0_res = svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z0_z0_z15:
+** ...
+** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint32x4_t, svint32_t,
+ z0 = svqdmulh_single_s32_x4 (z0, z15),
+ z0 = svqdmulh (z0, z15))
+
+/*
+** qdmulh_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint32x4_t, svint32_t, z24,
+ svqdmulh_single_s32_x4 (z24, z16),
+ svqdmulh (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qdmulh_z0_z0_z4:
+** sqdmulh {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z4, svint64x2_t, z0,
+ svqdmulh_s64_x2 (z0, z4),
+ svqdmulh (z0, z4))
+
+/*
+** qdmulh_z0_z4_z0:
+** sqdmulh {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z0, svint64x2_t, z0,
+ svqdmulh_s64_x2 (z4, z0),
+ svqdmulh (z4, z0))
+
+/*
+** qdmulh_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.d - z29\.d}
+** |
+** sqdmulh [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z28, svint64x2_t, z0,
+ svqdmulh_s64_x2 (z4, z28),
+ svqdmulh (z4, z28))
+
+/*
+** qdmulh_z18_z18_z4:
+** sqdmulh {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (qdmulh_z18_z18_z4, svint64x2_t, z18,
+ svqdmulh_s64_x2 (z18, z4),
+ svqdmulh (z18, z4))
+
+/*
+** qdmulh_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z23_z23_z18, svint64x2_t, z23,
+ svqdmulh_s64_x2 (z23, z18),
+ svqdmulh (z23, z18))
+
+/*
+** qdmulh_z28_z28_z0:
+** sqdmulh {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (qdmulh_z28_z28_z0, svint64x2_t, z28,
+ svqdmulh_s64_x2 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_z0_z0_z18:
+** sqdmulh {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z18, svint64x2_t, z0,
+ svqdmulh_s64_x2 (z0, z18),
+ svqdmulh (z0, z18))
+
+/*
+** qdmulh_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** sqdmulh {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z4_z4_z23, svint64x2_t, z4,
+ svqdmulh_s64_x2 (z4, z23),
+ svqdmulh (z4, z23))
+
+/*
+** qdmulh_single_z24_z24_z0:
+** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint64x2_t, svint64_t, z24,
+ svqdmulh_single_s64_x2 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** sqdmulh {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint64x2_t, svint64_t, z24,
+ svqdmulh_single_s64_x2 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint64x2_t, svint64_t, z24,
+ svqdmulh_single_s64_x2 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z1_z24_z0:
+** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint64x2_t, svint64_t, z1,
+ svqdmulh_single_s64_x2 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint64x2_t, svint64_t, z1,
+ svqdmulh_single_s64_x2 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z18_z18_z0:
+** sqdmulh {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint64x2_t, svint64_t, z18,
+ svqdmulh_single_s64_x2 (z18, z0),
+ svqdmulh (z18, z0))
+
+/*
+** qdmulh_single_awkward:
+** ...
+** sqdmulh ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint64x2_t, svint64_t,
+ z0_res = svqdmulh_single_s64_x2 (z1, z0),
+ z0_res = svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z0_z0_z15:
+** ...
+** sqdmulh {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint64x2_t, svint64_t,
+ z0 = svqdmulh_single_s64_x2 (z0, z15),
+ z0 = svqdmulh (z0, z15))
+
+/*
+** qdmulh_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint64x2_t, svint64_t, z24,
+ svqdmulh_single_s64_x2 (z24, z16),
+ svqdmulh (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qdmulh_z0_z0_z4:
+** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z4, svint64x4_t, z0,
+ svqdmulh_s64_x4 (z0, z4),
+ svqdmulh (z0, z4))
+
+/*
+** qdmulh_z0_z4_z0:
+** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z0, svint64x4_t, z0,
+ svqdmulh_s64_x4 (z4, z0),
+ svqdmulh (z4, z0))
+
+/*
+** qdmulh_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.d - z31\.d}
+** |
+** sqdmulh [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z28, svint64x4_t, z0,
+ svqdmulh_s64_x4 (z4, z28),
+ svqdmulh (z4, z28))
+
+/*
+** qdmulh_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z18_z18_z4, svint64x4_t, z18,
+ svqdmulh_s64_x4 (z18, z4),
+ svqdmulh (z18, z4))
+
+/*
+** qdmulh_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z23_z23_z28, svint64x4_t, z23,
+ svqdmulh_s64_x4 (z23, z28),
+ svqdmulh (z23, z28))
+
+/*
+** qdmulh_z28_z28_z0:
+** sqdmulh {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (qdmulh_z28_z28_z0, svint64x4_t, z28,
+ svqdmulh_s64_x4 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z18, svint64x4_t, z0,
+ svqdmulh_s64_x4 (z0, z18),
+ svqdmulh (z0, z18))
+
+/*
+** qdmulh_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** sqdmulh {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z4_z4_z23, svint64x4_t, z4,
+ svqdmulh_s64_x4 (z4, z23),
+ svqdmulh (z4, z23))
+
+/*
+** qdmulh_single_z24_z24_z0:
+** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint64x4_t, svint64_t, z24,
+ svqdmulh_single_s64_x4 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** sqdmulh {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint64x4_t, svint64_t, z24,
+ svqdmulh_single_s64_x4 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint64x4_t, svint64_t, z24,
+ svqdmulh_single_s64_x4 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z1_z24_z0:
+** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint64x4_t, svint64_t, z1,
+ svqdmulh_single_s64_x4 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint64x4_t, svint64_t, z1,
+ svqdmulh_single_s64_x4 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint64x4_t, svint64_t, z18,
+ svqdmulh_single_s64_x4 (z18, z0),
+ svqdmulh (z18, z0))
+
+/*
+** qdmulh_single_awkward:
+** ...
+** sqdmulh ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint64x4_t, svint64_t,
+ z0_res = svqdmulh_single_s64_x4 (z1, z0),
+ z0_res = svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z0_z0_z15:
+** ...
+** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint64x4_t, svint64_t,
+ z0 = svqdmulh_single_s64_x4 (z0, z15),
+ z0 = svqdmulh (z0, z15))
+
+/*
+** qdmulh_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint64x4_t, svint64_t, z24,
+ svqdmulh_single_s64_x4 (z24, z16),
+ svqdmulh (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qdmulh_z0_z0_z4:
+** sqdmulh {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z4, svint8x2_t, z0,
+ svqdmulh_s8_x2 (z0, z4),
+ svqdmulh (z0, z4))
+
+/*
+** qdmulh_z0_z4_z0:
+** sqdmulh {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z0, svint8x2_t, z0,
+ svqdmulh_s8_x2 (z4, z0),
+ svqdmulh (z4, z0))
+
+/*
+** qdmulh_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.b - z29\.b}
+** |
+** sqdmulh [^\n]+, {z28\.b - z29\.b}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z28, svint8x2_t, z0,
+ svqdmulh_s8_x2 (z4, z28),
+ svqdmulh (z4, z28))
+
+/*
+** qdmulh_z18_z18_z4:
+** sqdmulh {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (qdmulh_z18_z18_z4, svint8x2_t, z18,
+ svqdmulh_s8_x2 (z18, z4),
+ svqdmulh (z18, z4))
+
+/*
+** qdmulh_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z23_z23_z18, svint8x2_t, z23,
+ svqdmulh_s8_x2 (z23, z18),
+ svqdmulh (z23, z18))
+
+/*
+** qdmulh_z28_z28_z0:
+** sqdmulh {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_XN (qdmulh_z28_z28_z0, svint8x2_t, z28,
+ svqdmulh_s8_x2 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_z0_z0_z18:
+** sqdmulh {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z18, svint8x2_t, z0,
+ svqdmulh_s8_x2 (z0, z18),
+ svqdmulh (z0, z18))
+
+/*
+** qdmulh_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** |
+** sqdmulh {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z4_z4_z23, svint8x2_t, z4,
+ svqdmulh_s8_x2 (z4, z23),
+ svqdmulh (z4, z23))
+
+/*
+** qdmulh_single_z24_z24_z0:
+** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint8x2_t, svint8_t, z24,
+ svqdmulh_single_s8_x2 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** |
+** sqdmulh {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint8x2_t, svint8_t, z24,
+ svqdmulh_single_s8_x2 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint8x2_t, svint8_t, z24,
+ svqdmulh_single_s8_x2 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z1_z24_z0:
+** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint8x2_t, svint8_t, z1,
+ svqdmulh_single_s8_x2 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint8x2_t, svint8_t, z1,
+ svqdmulh_single_s8_x2 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z18_z18_z0:
+** sqdmulh {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint8x2_t, svint8_t, z18,
+ svqdmulh_single_s8_x2 (z18, z0),
+ svqdmulh (z18, z0))
+
+/*
+** qdmulh_single_awkward:
+** ...
+** sqdmulh ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint8x2_t, svint8_t,
+ z0_res = svqdmulh_single_s8_x2 (z1, z0),
+ z0_res = svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z0_z0_z15:
+** ...
+** sqdmulh {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint8x2_t, svint8_t,
+ z0 = svqdmulh_single_s8_x2 (z0, z15),
+ z0 = svqdmulh (z0, z15))
+
+/*
+** qdmulh_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint8x2_t, svint8_t, z24,
+ svqdmulh_single_s8_x2 (z24, z16),
+ svqdmulh (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qdmulh_z0_z0_z4:
+** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z4, svint8x4_t, z0,
+ svqdmulh_s8_x4 (z0, z4),
+ svqdmulh (z0, z4))
+
+/*
+** qdmulh_z0_z4_z0:
+** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z0, svint8x4_t, z0,
+ svqdmulh_s8_x4 (z4, z0),
+ svqdmulh (z4, z0))
+
+/*
+** qdmulh_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.b - z31\.b}
+** |
+** sqdmulh [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z4_z28, svint8x4_t, z0,
+ svqdmulh_s8_x4 (z4, z28),
+ svqdmulh (z4, z28))
+
+/*
+** qdmulh_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z18_z18_z4, svint8x4_t, z18,
+ svqdmulh_s8_x4 (z18, z4),
+ svqdmulh (z18, z4))
+
+/*
+** qdmulh_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (qdmulh_z23_z23_z28, svint8x4_t, z23,
+ svqdmulh_s8_x4 (z23, z28),
+ svqdmulh (z23, z28))
+
+/*
+** qdmulh_z28_z28_z0:
+** sqdmulh {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (qdmulh_z28_z28_z0, svint8x4_t, z28,
+ svqdmulh_s8_x4 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** |
+** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z0_z0_z18, svint8x4_t, z0,
+ svqdmulh_s8_x4 (z0, z18),
+ svqdmulh (z0, z18))
+
+/*
+** qdmulh_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** |
+** sqdmulh {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (qdmulh_z4_z4_z23, svint8x4_t, z4,
+ svqdmulh_s8_x4 (z4, z23),
+ svqdmulh (z4, z23))
+
+/*
+** qdmulh_single_z24_z24_z0:
+** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint8x4_t, svint8_t, z24,
+ svqdmulh_single_s8_x4 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** |
+** sqdmulh {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint8x4_t, svint8_t, z24,
+ svqdmulh_single_s8_x4 (z28, z0),
+ svqdmulh (z28, z0))
+
+/*
+** qdmulh_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint8x4_t, svint8_t, z24,
+ svqdmulh_single_s8_x4 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z1_z24_z0:
+** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint8x4_t, svint8_t, z1,
+ svqdmulh_single_s8_x4 (z24, z0),
+ svqdmulh (z24, z0))
+
+/*
+** qdmulh_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint8x4_t, svint8_t, z1,
+ svqdmulh_single_s8_x4 (z1, z0),
+ svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqdmulh [^\n]+, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint8x4_t, svint8_t, z18,
+ svqdmulh_single_s8_x4 (z18, z0),
+ svqdmulh (z18, z0))
+
+/*
+** qdmulh_single_awkward:
+** ...
+** sqdmulh ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint8x4_t, svint8_t,
+ z0_res = svqdmulh_single_s8_x4 (z1, z0),
+ z0_res = svqdmulh (z1, z0))
+
+/*
+** qdmulh_single_z0_z0_z15:
+** ...
+** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint8x4_t, svint8_t,
+ z0 = svqdmulh_single_s8_x4 (z0, z15),
+ z0 = svqdmulh (z0, z15))
+
+/*
+** qdmulh_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint8x4_t, svint8_t, z24,
+ svqdmulh_single_s8_x4 (z24, z16),
+ svqdmulh (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshr_z0_z0_1:
+** sqrshr z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshr_z0_z0_1, svint32x2_t, svint16_t,
+ z0_res = svqrshr_n_s16_s32_x2 (z0, 1),
+ z0_res = svqrshr_s16 (z0, 1))
+
+/*
+** qrshr_z0_z6_16:
+** sqrshr z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshr_z0_z6_16, svint32x2_t, svint16_t,
+ z0_res = svqrshr_n_s16_s32_x2 (z6, 16),
+ z0_res = svqrshr_s16 (z6, 16))
+
+/*
+** qrshr_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** sqrshr z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshr_z0_z29_13, svint32x2_t, svint16_t,
+ z0_res = svqrshr_n_s16_s32_x2 (z29, 13),
+ z0_res = svqrshr_s16 (z29, 13))
+
+/*
+** qrshr_z5_z0_11:
+** sqrshr z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshr_z5_z0_11, svint32x2_t, svint16_t,
+ z5 = svqrshr_n_s16_s32_x2 (z0, 11),
+ z5 = svqrshr_s16 (z0, 11))
+
+/*
+** qrshr_z22_z16_15:
+** sqrshr z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshr_z22_z16_15, svint32x2_t, svint16_t,
+ z22 = svqrshr_n_s16_s32_x2 (z16, 15),
+ z22 = svqrshr_s16 (z16, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshr_z0_z0_1:
+** sqrshr z0\.h, {z0\.d - z3\.d}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z0_1, svint64x4_t, svint16_t,
+ z0_res = svqrshr_n_s16_s64_x4 (z0, 1),
+ z0_res = svqrshr_s16 (z0, 1))
+
+/*
+** qrshr_z0_z4_64:
+** sqrshr z0\.h, {z4\.d - z7\.d}, #64
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z4_64, svint64x4_t, svint16_t,
+ z0_res = svqrshr_n_s16_s64_x4 (z4, 64),
+ z0_res = svqrshr_s16 (z4, 64))
+
+/*
+** qrshr_z0_z21_33:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshr z0\.h, [^\n]+, #33
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z21_33, svint64x4_t, svint16_t,
+ z0_res = svqrshr_n_s16_s64_x4 (z21, 33),
+ z0_res = svqrshr_s16 (z21, 33))
+
+/*
+** qrshr_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshr z25\.h, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshr_z25_z26_12, svint64x4_t, svint16_t,
+ z25 = svqrshr_n_s16_s64_x4 (z26, 12),
+ z25 = svqrshr_s16 (z26, 12))
+
+/*
+** qrshr_z25_z0_32:
+** sqrshr z25\.h, {z0\.d - z3\.d}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshr_z25_z0_32, svint64x4_t, svint16_t,
+ z25 = svqrshr_n_s16_s64_x4 (z0, 32),
+ z25 = svqrshr_s16 (z0, 32))
+
+/*
+** qrshr_z22_z16_63:
+** sqrshr z22\.h, {z16\.d - z19\.d}, #63
+** ret
+*/
+TEST_X4_NARROW (qrshr_z22_z16_63, svint64x4_t, svint16_t,
+ z22_res = svqrshr_n_s16_s64_x4 (z16, 63),
+ z22_res = svqrshr_s16 (z16, 63))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshr_z0_z0_1:
+** sqrshr z0\.b, {z0\.s - z3\.s}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z0_1, svint32x4_t, svint8_t,
+ z0_res = svqrshr_n_s8_s32_x4 (z0, 1),
+ z0_res = svqrshr_s8 (z0, 1))
+
+/*
+** qrshr_z0_z4_32:
+** sqrshr z0\.b, {z4\.s - z7\.s}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z4_32, svint32x4_t, svint8_t,
+ z0_res = svqrshr_n_s8_s32_x4 (z4, 32),
+ z0_res = svqrshr_s8 (z4, 32))
+
+/*
+** qrshr_z0_z21_2:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshr z0\.b, [^\n]+, #2
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z21_2, svint32x4_t, svint8_t,
+ z0_res = svqrshr_n_s8_s32_x4 (z21, 2),
+ z0_res = svqrshr_s8 (z21, 2))
+
+/*
+** qrshr_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshr z25\.b, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshr_z25_z26_12, svint32x4_t, svint8_t,
+ z25 = svqrshr_n_s8_s32_x4 (z26, 12),
+ z25 = svqrshr_s8 (z26, 12))
+
+/*
+** qrshr_z25_z0_16:
+** sqrshr z25\.b, {z0\.s - z3\.s}, #16
+** ret
+*/
+TEST_X4_NARROW (qrshr_z25_z0_16, svint32x4_t, svint8_t,
+ z25 = svqrshr_n_s8_s32_x4 (z0, 16),
+ z25 = svqrshr_s8 (z0, 16))
+
+/*
+** qrshr_z22_z16_31:
+** sqrshr z22\.b, {z16\.s - z19\.s}, #31
+** ret
+*/
+TEST_X4_NARROW (qrshr_z22_z16_31, svint32x4_t, svint8_t,
+ z22_res = svqrshr_n_s8_s32_x4 (z16, 31),
+ z22_res = svqrshr_s8 (z16, 31))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshr_z0_z0_1:
+** uqrshr z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshr_z0_z0_1, svuint32x2_t, svuint16_t,
+ z0_res = svqrshr_n_u16_u32_x2 (z0, 1),
+ z0_res = svqrshr_u16 (z0, 1))
+
+/*
+** qrshr_z0_z6_16:
+** uqrshr z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshr_z0_z6_16, svuint32x2_t, svuint16_t,
+ z0_res = svqrshr_n_u16_u32_x2 (z6, 16),
+ z0_res = svqrshr_u16 (z6, 16))
+
+/*
+** qrshr_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** uqrshr z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshr_z0_z29_13, svuint32x2_t, svuint16_t,
+ z0_res = svqrshr_n_u16_u32_x2 (z29, 13),
+ z0_res = svqrshr_u16 (z29, 13))
+
+/*
+** qrshr_z5_z0_11:
+** uqrshr z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshr_z5_z0_11, svuint32x2_t, svuint16_t,
+ z5 = svqrshr_n_u16_u32_x2 (z0, 11),
+ z5 = svqrshr_u16 (z0, 11))
+
+/*
+** qrshr_z22_z16_15:
+** uqrshr z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshr_z22_z16_15, svuint32x2_t, svuint16_t,
+ z22 = svqrshr_n_u16_u32_x2 (z16, 15),
+ z22 = svqrshr_u16 (z16, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshr_z0_z0_1:
+** uqrshr z0\.h, {z0\.d - z3\.d}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z0_1, svuint64x4_t, svuint16_t,
+ z0_res = svqrshr_n_u16_u64_x4 (z0, 1),
+ z0_res = svqrshr_u16 (z0, 1))
+
+/*
+** qrshr_z0_z4_64:
+** uqrshr z0\.h, {z4\.d - z7\.d}, #64
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z4_64, svuint64x4_t, svuint16_t,
+ z0_res = svqrshr_n_u16_u64_x4 (z4, 64),
+ z0_res = svqrshr_u16 (z4, 64))
+
+/*
+** qrshr_z0_z21_33:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqrshr z0\.h, [^\n]+, #33
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z21_33, svuint64x4_t, svuint16_t,
+ z0_res = svqrshr_n_u16_u64_x4 (z21, 33),
+ z0_res = svqrshr_u16 (z21, 33))
+
+/*
+** qrshr_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqrshr z25\.h, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshr_z25_z26_12, svuint64x4_t, svuint16_t,
+ z25 = svqrshr_n_u16_u64_x4 (z26, 12),
+ z25 = svqrshr_u16 (z26, 12))
+
+/*
+** qrshr_z25_z0_32:
+** uqrshr z25\.h, {z0\.d - z3\.d}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshr_z25_z0_32, svuint64x4_t, svuint16_t,
+ z25 = svqrshr_n_u16_u64_x4 (z0, 32),
+ z25 = svqrshr_u16 (z0, 32))
+
+/*
+** qrshr_z22_z16_63:
+** uqrshr z22\.h, {z16\.d - z19\.d}, #63
+** ret
+*/
+TEST_X4_NARROW (qrshr_z22_z16_63, svuint64x4_t, svuint16_t,
+ z22_res = svqrshr_n_u16_u64_x4 (z16, 63),
+ z22_res = svqrshr_u16 (z16, 63))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshr_z0_z0_1:
+** uqrshr z0\.b, {z0\.s - z3\.s}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z0_1, svuint32x4_t, svuint8_t,
+ z0_res = svqrshr_n_u8_u32_x4 (z0, 1),
+ z0_res = svqrshr_u8 (z0, 1))
+
+/*
+** qrshr_z0_z4_32:
+** uqrshr z0\.b, {z4\.s - z7\.s}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z4_32, svuint32x4_t, svuint8_t,
+ z0_res = svqrshr_n_u8_u32_x4 (z4, 32),
+ z0_res = svqrshr_u8 (z4, 32))
+
+/*
+** qrshr_z0_z21_2:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqrshr z0\.b, [^\n]+, #2
+** ret
+*/
+TEST_X4_NARROW (qrshr_z0_z21_2, svuint32x4_t, svuint8_t,
+ z0_res = svqrshr_n_u8_u32_x4 (z21, 2),
+ z0_res = svqrshr_u8 (z21, 2))
+
+/*
+** qrshr_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqrshr z25\.b, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshr_z25_z26_12, svuint32x4_t, svuint8_t,
+ z25 = svqrshr_n_u8_u32_x4 (z26, 12),
+ z25 = svqrshr_u8 (z26, 12))
+
+/*
+** qrshr_z25_z0_16:
+** uqrshr z25\.b, {z0\.s - z3\.s}, #16
+** ret
+*/
+TEST_X4_NARROW (qrshr_z25_z0_16, svuint32x4_t, svuint8_t,
+ z25 = svqrshr_n_u8_u32_x4 (z0, 16),
+ z25 = svqrshr_u8 (z0, 16))
+
+/*
+** qrshr_z22_z16_31:
+** uqrshr z22\.b, {z16\.s - z19\.s}, #31
+** ret
+*/
+TEST_X4_NARROW (qrshr_z22_z16_31, svuint32x4_t, svuint8_t,
+ z22_res = svqrshr_n_u8_u32_x4 (z16, 31),
+ z22_res = svqrshr_u8 (z16, 31))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshrn_z0_z0_1:
+** sqrshrn z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z0_1, svint32x2_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s32_x2 (z0, 1),
+ z0_res = svqrshrn_s16 (z0, 1))
+
+/*
+** qrshrn_z0_z6_16:
+** sqrshrn z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z6_16, svint32x2_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s32_x2 (z6, 16),
+ z0_res = svqrshrn_s16 (z6, 16))
+
+/*
+** qrshrn_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrn z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z29_13, svint32x2_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s32_x2 (z29, 13),
+ z0_res = svqrshrn_s16 (z29, 13))
+
+/*
+** qrshrn_z5_z0_11:
+** sqrshrn z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z5_z0_11, svint32x2_t, svint16_t,
+ z5 = svqrshrn_n_s16_s32_x2 (z0, 11),
+ z5 = svqrshrn_s16 (z0, 11))
+
+/*
+** qrshrn_z22_z16_15:
+** sqrshrn z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z22_z16_15, svint32x2_t, svint16_t,
+ z22 = svqrshrn_n_s16_s32_x2 (z16, 15),
+ z22 = svqrshrn_s16 (z16, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshrn_z0_z0_1:
+** sqrshrn z0\.h, {z0\.d - z3\.d}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z0_1, svint64x4_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s64_x4 (z0, 1),
+ z0_res = svqrshrn_s16 (z0, 1))
+
+/*
+** qrshrn_z0_z4_64:
+** sqrshrn z0\.h, {z4\.d - z7\.d}, #64
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z4_64, svint64x4_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s64_x4 (z4, 64),
+ z0_res = svqrshrn_s16 (z4, 64))
+
+/*
+** qrshrn_z0_z21_33:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrn z0\.h, [^\n]+, #33
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z21_33, svint64x4_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s64_x4 (z21, 33),
+ z0_res = svqrshrn_s16 (z21, 33))
+
+/*
+** qrshrn_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrn z25\.h, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z25_z26_12, svint64x4_t, svint16_t,
+ z25 = svqrshrn_n_s16_s64_x4 (z26, 12),
+ z25 = svqrshrn_s16 (z26, 12))
+
+/*
+** qrshrn_z25_z0_32:
+** sqrshrn z25\.h, {z0\.d - z3\.d}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z25_z0_32, svint64x4_t, svint16_t,
+ z25 = svqrshrn_n_s16_s64_x4 (z0, 32),
+ z25 = svqrshrn_s16 (z0, 32))
+
+/*
+** qrshrn_z22_z16_63:
+** sqrshrn z22\.h, {z16\.d - z19\.d}, #63
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z22_z16_63, svint64x4_t, svint16_t,
+ z22_res = svqrshrn_n_s16_s64_x4 (z16, 63),
+ z22_res = svqrshrn_s16 (z16, 63))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshrn_z0_z0_1:
+** sqrshrn z0\.b, {z0\.s - z3\.s}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z0_1, svint32x4_t, svint8_t,
+ z0_res = svqrshrn_n_s8_s32_x4 (z0, 1),
+ z0_res = svqrshrn_s8 (z0, 1))
+
+/*
+** qrshrn_z0_z4_32:
+** sqrshrn z0\.b, {z4\.s - z7\.s}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z4_32, svint32x4_t, svint8_t,
+ z0_res = svqrshrn_n_s8_s32_x4 (z4, 32),
+ z0_res = svqrshrn_s8 (z4, 32))
+
+/*
+** qrshrn_z0_z21_2:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrn z0\.b, [^\n]+, #2
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z21_2, svint32x4_t, svint8_t,
+ z0_res = svqrshrn_n_s8_s32_x4 (z21, 2),
+ z0_res = svqrshrn_s8 (z21, 2))
+
+/*
+** qrshrn_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrn z25\.b, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z25_z26_12, svint32x4_t, svint8_t,
+ z25 = svqrshrn_n_s8_s32_x4 (z26, 12),
+ z25 = svqrshrn_s8 (z26, 12))
+
+/*
+** qrshrn_z25_z0_16:
+** sqrshrn z25\.b, {z0\.s - z3\.s}, #16
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z25_z0_16, svint32x4_t, svint8_t,
+ z25 = svqrshrn_n_s8_s32_x4 (z0, 16),
+ z25 = svqrshrn_s8 (z0, 16))
+
+/*
+** qrshrn_z22_z16_31:
+** sqrshrn z22\.b, {z16\.s - z19\.s}, #31
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z22_z16_31, svint32x4_t, svint8_t,
+ z22_res = svqrshrn_n_s8_s32_x4 (z16, 31),
+ z22_res = svqrshrn_s8 (z16, 31))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshrn_z0_z0_1:
+** uqrshrn z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z0_1, svuint32x2_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u32_x2 (z0, 1),
+ z0_res = svqrshrn_u16 (z0, 1))
+
+/*
+** qrshrn_z0_z6_16:
+** uqrshrn z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z6_16, svuint32x2_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u32_x2 (z6, 16),
+ z0_res = svqrshrn_u16 (z6, 16))
+
+/*
+** qrshrn_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** uqrshrn z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z29_13, svuint32x2_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u32_x2 (z29, 13),
+ z0_res = svqrshrn_u16 (z29, 13))
+
+/*
+** qrshrn_z5_z0_11:
+** uqrshrn z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z5_z0_11, svuint32x2_t, svuint16_t,
+ z5 = svqrshrn_n_u16_u32_x2 (z0, 11),
+ z5 = svqrshrn_u16 (z0, 11))
+
+/*
+** qrshrn_z22_z16_15:
+** uqrshrn z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z22_z16_15, svuint32x2_t, svuint16_t,
+ z22 = svqrshrn_n_u16_u32_x2 (z16, 15),
+ z22 = svqrshrn_u16 (z16, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshrn_z0_z0_1:
+** uqrshrn z0\.h, {z0\.d - z3\.d}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z0_1, svuint64x4_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u64_x4 (z0, 1),
+ z0_res = svqrshrn_u16 (z0, 1))
+
+/*
+** qrshrn_z0_z4_64:
+** uqrshrn z0\.h, {z4\.d - z7\.d}, #64
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z4_64, svuint64x4_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u64_x4 (z4, 64),
+ z0_res = svqrshrn_u16 (z4, 64))
+
+/*
+** qrshrn_z0_z21_33:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqrshrn z0\.h, [^\n]+, #33
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z21_33, svuint64x4_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u64_x4 (z21, 33),
+ z0_res = svqrshrn_u16 (z21, 33))
+
+/*
+** qrshrn_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqrshrn z25\.h, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z25_z26_12, svuint64x4_t, svuint16_t,
+ z25 = svqrshrn_n_u16_u64_x4 (z26, 12),
+ z25 = svqrshrn_u16 (z26, 12))
+
+/*
+** qrshrn_z25_z0_32:
+** uqrshrn z25\.h, {z0\.d - z3\.d}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z25_z0_32, svuint64x4_t, svuint16_t,
+ z25 = svqrshrn_n_u16_u64_x4 (z0, 32),
+ z25 = svqrshrn_u16 (z0, 32))
+
+/*
+** qrshrn_z22_z16_63:
+** uqrshrn z22\.h, {z16\.d - z19\.d}, #63
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z22_z16_63, svuint64x4_t, svuint16_t,
+ z22_res = svqrshrn_n_u16_u64_x4 (z16, 63),
+ z22_res = svqrshrn_u16 (z16, 63))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshrn_z0_z0_1:
+** uqrshrn z0\.b, {z0\.s - z3\.s}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z0_1, svuint32x4_t, svuint8_t,
+ z0_res = svqrshrn_n_u8_u32_x4 (z0, 1),
+ z0_res = svqrshrn_u8 (z0, 1))
+
+/*
+** qrshrn_z0_z4_32:
+** uqrshrn z0\.b, {z4\.s - z7\.s}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z4_32, svuint32x4_t, svuint8_t,
+ z0_res = svqrshrn_n_u8_u32_x4 (z4, 32),
+ z0_res = svqrshrn_u8 (z4, 32))
+
+/*
+** qrshrn_z0_z21_2:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqrshrn z0\.b, [^\n]+, #2
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z0_z21_2, svuint32x4_t, svuint8_t,
+ z0_res = svqrshrn_n_u8_u32_x4 (z21, 2),
+ z0_res = svqrshrn_u8 (z21, 2))
+
+/*
+** qrshrn_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uqrshrn z25\.b, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z25_z26_12, svuint32x4_t, svuint8_t,
+ z25 = svqrshrn_n_u8_u32_x4 (z26, 12),
+ z25 = svqrshrn_u8 (z26, 12))
+
+/*
+** qrshrn_z25_z0_16:
+** uqrshrn z25\.b, {z0\.s - z3\.s}, #16
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z25_z0_16, svuint32x4_t, svuint8_t,
+ z25 = svqrshrn_n_u8_u32_x4 (z0, 16),
+ z25 = svqrshrn_u8 (z0, 16))
+
+/*
+** qrshrn_z22_z16_31:
+** uqrshrn z22\.b, {z16\.s - z19\.s}, #31
+** ret
+*/
+TEST_X4_NARROW (qrshrn_z22_z16_31, svuint32x4_t, svuint8_t,
+ z22_res = svqrshrn_n_u8_u32_x4 (z16, 31),
+ z22_res = svqrshrn_u8 (z16, 31))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshru_z0_z0_1:
+** sqrshru z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshru_z0_z0_1, svint32x2_t, svuint16_t,
+ z0_res = svqrshru_n_u16_s32_x2 (z0, 1),
+ z0_res = svqrshru_u16 (z0, 1))
+
+/*
+** qrshru_z0_z6_16:
+** sqrshru z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshru_z0_z6_16, svint32x2_t, svuint16_t,
+ z0_res = svqrshru_n_u16_s32_x2 (z6, 16),
+ z0_res = svqrshru_u16 (z6, 16))
+
+/*
+** qrshru_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** sqrshru z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshru_z0_z29_13, svint32x2_t, svuint16_t,
+ z0_res = svqrshru_n_u16_s32_x2 (z29, 13),
+ z0_res = svqrshru_u16 (z29, 13))
+
+/*
+** qrshru_z5_z0_11:
+** sqrshru z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshru_z5_z0_11, svint32x2_t, svuint16_t,
+ z5 = svqrshru_n_u16_s32_x2 (z0, 11),
+ z5 = svqrshru_u16 (z0, 11))
+
+/*
+** qrshru_z22_z16_15:
+** sqrshru z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshru_z22_z16_15, svint32x2_t, svuint16_t,
+ z22 = svqrshru_n_u16_s32_x2 (z16, 15),
+ z22 = svqrshru_u16 (z16, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshru_z0_z0_1:
+** sqrshru z0\.h, {z0\.d - z3\.d}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshru_z0_z0_1, svint64x4_t, svuint16_t,
+ z0_res = svqrshru_n_u16_s64_x4 (z0, 1),
+ z0_res = svqrshru_u16 (z0, 1))
+
+/*
+** qrshru_z0_z4_64:
+** sqrshru z0\.h, {z4\.d - z7\.d}, #64
+** ret
+*/
+TEST_X4_NARROW (qrshru_z0_z4_64, svint64x4_t, svuint16_t,
+ z0_res = svqrshru_n_u16_s64_x4 (z4, 64),
+ z0_res = svqrshru_u16 (z4, 64))
+
+/*
+** qrshru_z0_z21_33:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshru z0\.h, [^\n]+, #33
+** ret
+*/
+TEST_X4_NARROW (qrshru_z0_z21_33, svint64x4_t, svuint16_t,
+ z0_res = svqrshru_n_u16_s64_x4 (z21, 33),
+ z0_res = svqrshru_u16 (z21, 33))
+
+/*
+** qrshru_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshru z25\.h, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshru_z25_z26_12, svint64x4_t, svuint16_t,
+ z25 = svqrshru_n_u16_s64_x4 (z26, 12),
+ z25 = svqrshru_u16 (z26, 12))
+
+/*
+** qrshru_z25_z0_32:
+** sqrshru z25\.h, {z0\.d - z3\.d}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshru_z25_z0_32, svint64x4_t, svuint16_t,
+ z25 = svqrshru_n_u16_s64_x4 (z0, 32),
+ z25 = svqrshru_u16 (z0, 32))
+
+/*
+** qrshru_z22_z16_63:
+** sqrshru z22\.h, {z16\.d - z19\.d}, #63
+** ret
+*/
+TEST_X4_NARROW (qrshru_z22_z16_63, svint64x4_t, svuint16_t,
+ z22_res = svqrshru_n_u16_s64_x4 (z16, 63),
+ z22_res = svqrshru_u16 (z16, 63))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshru_z0_z0_1:
+** sqrshru z0\.b, {z0\.s - z3\.s}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshru_z0_z0_1, svint32x4_t, svuint8_t,
+ z0_res = svqrshru_n_u8_s32_x4 (z0, 1),
+ z0_res = svqrshru_u8 (z0, 1))
+
+/*
+** qrshru_z0_z4_32:
+** sqrshru z0\.b, {z4\.s - z7\.s}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshru_z0_z4_32, svint32x4_t, svuint8_t,
+ z0_res = svqrshru_n_u8_s32_x4 (z4, 32),
+ z0_res = svqrshru_u8 (z4, 32))
+
+/*
+** qrshru_z0_z21_2:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshru z0\.b, [^\n]+, #2
+** ret
+*/
+TEST_X4_NARROW (qrshru_z0_z21_2, svint32x4_t, svuint8_t,
+ z0_res = svqrshru_n_u8_s32_x4 (z21, 2),
+ z0_res = svqrshru_u8 (z21, 2))
+
+/*
+** qrshru_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshru z25\.b, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshru_z25_z26_12, svint32x4_t, svuint8_t,
+ z25 = svqrshru_n_u8_s32_x4 (z26, 12),
+ z25 = svqrshru_u8 (z26, 12))
+
+/*
+** qrshru_z25_z0_16:
+** sqrshru z25\.b, {z0\.s - z3\.s}, #16
+** ret
+*/
+TEST_X4_NARROW (qrshru_z25_z0_16, svint32x4_t, svuint8_t,
+ z25 = svqrshru_n_u8_s32_x4 (z0, 16),
+ z25 = svqrshru_u8 (z0, 16))
+
+/*
+** qrshru_z22_z16_31:
+** sqrshru z22\.b, {z16\.s - z19\.s}, #31
+** ret
+*/
+TEST_X4_NARROW (qrshru_z22_z16_31, svint32x4_t, svuint8_t,
+ z22_res = svqrshru_n_u8_s32_x4 (z16, 31),
+ z22_res = svqrshru_u8 (z16, 31))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshrun_z0_z0_1:
+** sqrshrun z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z0_z0_1, svint32x2_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s32_x2 (z0, 1),
+ z0_res = svqrshrun_u16 (z0, 1))
+
+/*
+** qrshrun_z0_z6_16:
+** sqrshrun z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z0_z6_16, svint32x2_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s32_x2 (z6, 16),
+ z0_res = svqrshrun_u16 (z6, 16))
+
+/*
+** qrshrun_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrun z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z0_z29_13, svint32x2_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s32_x2 (z29, 13),
+ z0_res = svqrshrun_u16 (z29, 13))
+
+/*
+** qrshrun_z5_z0_11:
+** sqrshrun z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z5_z0_11, svint32x2_t, svuint16_t,
+ z5 = svqrshrun_n_u16_s32_x2 (z0, 11),
+ z5 = svqrshrun_u16 (z0, 11))
+
+/*
+** qrshrun_z22_z16_15:
+** sqrshrun z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z22_z16_15, svint32x2_t, svuint16_t,
+ z22 = svqrshrun_n_u16_s32_x2 (z16, 15),
+ z22 = svqrshrun_u16 (z16, 15))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshrun_z0_z0_1:
+** sqrshrun z0\.h, {z0\.d - z3\.d}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z0_z0_1, svint64x4_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s64_x4 (z0, 1),
+ z0_res = svqrshrun_u16 (z0, 1))
+
+/*
+** qrshrun_z0_z4_64:
+** sqrshrun z0\.h, {z4\.d - z7\.d}, #64
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z0_z4_64, svint64x4_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s64_x4 (z4, 64),
+ z0_res = svqrshrun_u16 (z4, 64))
+
+/*
+** qrshrun_z0_z21_33:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrun z0\.h, [^\n]+, #33
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z0_z21_33, svint64x4_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s64_x4 (z21, 33),
+ z0_res = svqrshrun_u16 (z21, 33))
+
+/*
+** qrshrun_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrun z25\.h, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z25_z26_12, svint64x4_t, svuint16_t,
+ z25 = svqrshrun_n_u16_s64_x4 (z26, 12),
+ z25 = svqrshrun_u16 (z26, 12))
+
+/*
+** qrshrun_z25_z0_32:
+** sqrshrun z25\.h, {z0\.d - z3\.d}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z25_z0_32, svint64x4_t, svuint16_t,
+ z25 = svqrshrun_n_u16_s64_x4 (z0, 32),
+ z25 = svqrshrun_u16 (z0, 32))
+
+/*
+** qrshrun_z22_z16_63:
+** sqrshrun z22\.h, {z16\.d - z19\.d}, #63
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z22_z16_63, svint64x4_t, svuint16_t,
+ z22_res = svqrshrun_n_u16_s64_x4 (z16, 63),
+ z22_res = svqrshrun_u16 (z16, 63))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** qrshrun_z0_z0_1:
+** sqrshrun z0\.b, {z0\.s - z3\.s}, #1
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z0_z0_1, svint32x4_t, svuint8_t,
+ z0_res = svqrshrun_n_u8_s32_x4 (z0, 1),
+ z0_res = svqrshrun_u8 (z0, 1))
+
+/*
+** qrshrun_z0_z4_32:
+** sqrshrun z0\.b, {z4\.s - z7\.s}, #32
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z0_z4_32, svint32x4_t, svuint8_t,
+ z0_res = svqrshrun_n_u8_s32_x4 (z4, 32),
+ z0_res = svqrshrun_u8 (z4, 32))
+
+/*
+** qrshrun_z0_z21_2:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrun z0\.b, [^\n]+, #2
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z0_z21_2, svint32x4_t, svuint8_t,
+ z0_res = svqrshrun_n_u8_s32_x4 (z21, 2),
+ z0_res = svqrshrun_u8 (z21, 2))
+
+/*
+** qrshrun_z25_z26_12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrun z25\.b, [^\n]+, #12
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z25_z26_12, svint32x4_t, svuint8_t,
+ z25 = svqrshrun_n_u8_s32_x4 (z26, 12),
+ z25 = svqrshrun_u8 (z26, 12))
+
+/*
+** qrshrun_z25_z0_16:
+** sqrshrun z25\.b, {z0\.s - z3\.s}, #16
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z25_z0_16, svint32x4_t, svuint8_t,
+ z25 = svqrshrun_n_u8_s32_x4 (z0, 16),
+ z25 = svqrshrun_u8 (z0, 16))
+
+/*
+** qrshrun_z22_z16_31:
+** sqrshrun z22\.b, {z16\.s - z19\.s}, #31
+** ret
+*/
+TEST_X4_NARROW (qrshrun_z22_z16_31, svint32x4_t, svuint8_t,
+ z22_res = svqrshrun_n_u8_s32_x4 (z16, 31),
+ z22_res = svqrshrun_u8 (z16, 31))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za16_s16_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.h - z1\.h}, za0h\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_0_0, svint16x2_t,
+ z0 = svread_hor_za16_s16_vg2 (0, 0),
+ z0 = svread_hor_za16_s16_vg2 (0, 0))
+
+/*
+** read_za16_u16_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.h - z5\.h}, za1h\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z4_1_1, svuint16x2_t,
+ z4 = svread_hor_za16_u16_vg2 (1, 1),
+ z4 = svread_hor_za16_u16_vg2 (1, 1))
+
+/*
+** read_za16_f16_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.h - z29\.h}, za0h\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_f16_z28_0_w11, svfloat16x2_t,
+ z28 = svread_hor_za16_f16_vg2 (0, w11),
+ z28 = svread_hor_za16_f16_vg2 (0, w11))
+
+/*
+** read_za16_bf16_z0_1_w12:
+** mova {z0\.h - z1\.h}, za1h\.h\[w12, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_bf16_z0_1_w12, svbfloat16x2_t,
+ z0 = svread_hor_za16_bf16_vg2 (1, w12),
+ z0 = svread_hor_za16_bf16_vg2 (1, w12))
+
+/*
+** read_za16_u16_z18_0_w15:
+** mova {z18\.h - z19\.h}, za0h\.h\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z18_0_w15, svuint16x2_t,
+ z18 = svread_hor_za16_u16_vg2 (0, w15),
+ z18 = svread_hor_za16_u16_vg2 (0, w15))
+
+/*
+** read_za16_s16_z23_1_w12p6:
+** mova {[^\n]+}, za1h\.h\[w12, 6:7\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z23_1_w12p6, svint16x2_t,
+ z23 = svread_hor_za16_s16_vg2 (1, w12 + 6),
+ z23 = svread_hor_za16_s16_vg2 (1, w12 + 6))
+
+/*
+** read_za16_f16_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.h - z5\.h}, za0h\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_f16_z4_0_w12p1, svfloat16x2_t,
+ z4 = svread_hor_za16_f16_vg2 (0, w12 + 1),
+ z4 = svread_hor_za16_f16_vg2 (0, w12 + 1))
+
+/*
+** read_za16_s16_z28_1_w12p2:
+** mova {z28\.h - z29\.h}, za1h\.h\[w12, 2:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z28_1_w12p2, svint16x2_t,
+ z28 = svread_hor_za16_s16_vg2 (1, w12 + 2),
+ z28 = svread_hor_za16_s16_vg2 (1, w12 + 2))
+
+/*
+** read_za16_u16_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.h - z1\.h}, za0h\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z0_0_w15p3, svuint16x2_t,
+ z0 = svread_hor_za16_u16_vg2 (0, w15 + 3),
+ z0 = svread_hor_za16_u16_vg2 (0, w15 + 3))
+
+/*
+** read_za16_bf16_z4_1_w15p4:
+** mova {z4\.h - z5\.h}, za1h\.h\[w15, 4:5\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_bf16_z4_1_w15p4, svbfloat16x2_t,
+ z4 = svread_hor_za16_bf16_vg2 (1, w15 + 4),
+ z4 = svread_hor_za16_bf16_vg2 (1, w15 + 4))
+
+/*
+** read_za16_u16_z28_0_w12p7:
+** add (w[0-9]+), w12, #?7
+** mova {z28\.h - z29\.h}, za0h\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z28_0_w12p7, svuint16x2_t,
+ z28 = svread_hor_za16_u16_vg2 (0, w12 + 7),
+ z28 = svread_hor_za16_u16_vg2 (0, w12 + 7))
+
+/*
+** read_za16_s16_z0_1_w15p8:
+** add (w[0-9]+), w15, #?8
+** mova {z0\.h - z1\.h}, za1h\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_1_w15p8, svint16x2_t,
+ z0 = svread_hor_za16_s16_vg2 (1, w15 + 8),
+ z0 = svread_hor_za16_s16_vg2 (1, w15 + 8))
+
+/*
+** read_za16_u16_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.h - z5\.h}, za0h\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z4_0_w12m1, svuint16x2_t,
+ z4 = svread_hor_za16_u16_vg2 (0, w12 - 1),
+ z4 = svread_hor_za16_u16_vg2 (0, w12 - 1))
+
+/*
+** read_za16_u16_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.h - z19\.h}, za1h\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z18_1_w16, svuint16x2_t,
+ z18 = svread_hor_za16_u16_vg2 (1, w16),
+ z18 = svread_hor_za16_u16_vg2 (1, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za16_s16_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.h - z3\.h}, za0h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_0_0, svint16x4_t,
+ z0 = svread_hor_za16_s16_vg4 (0, 0),
+ z0 = svread_hor_za16_s16_vg4 (0, 0))
+
+/*
+** read_za16_u16_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.h - z7\.h}, za1h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z4_1_1, svuint16x4_t,
+ z4 = svread_hor_za16_u16_vg4 (1, 1),
+ z4 = svread_hor_za16_u16_vg4 (1, 1))
+
+/*
+** read_za16_f16_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.h - z31\.h}, za0h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_f16_z28_0_w11, svfloat16x4_t,
+ z28 = svread_hor_za16_f16_vg4 (0, w11),
+ z28 = svread_hor_za16_f16_vg4 (0, w11))
+
+/*
+** read_za16_s16_z0_1_w12:
+** mova {z0\.h - z3\.h}, za1h\.h\[w12, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_1_w12, svint16x4_t,
+ z0 = svread_hor_za16_s16_vg4 (1, w12),
+ z0 = svread_hor_za16_s16_vg4 (1, w12))
+
+/*
+** read_za16_u16_z18_0_w15:
+** mova {[^\n]+}, za0h\.h\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z18_0_w15, svuint16x4_t,
+ z18 = svread_hor_za16_u16_vg4 (0, w15),
+ z18 = svread_hor_za16_u16_vg4 (0, w15))
+
+/*
+** read_za16_bf16_z23_1_w12p4:
+** mova {[^\n]+}, za1h\.h\[w12, 4:7\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_bf16_z23_1_w12p4, svbfloat16x4_t,
+ z23 = svread_hor_za16_bf16_vg4 (1, w12 + 4),
+ z23 = svread_hor_za16_bf16_vg4 (1, w12 + 4))
+
+/*
+** read_za16_u16_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.h - z7\.h}, za0h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z4_0_w12p1, svuint16x4_t,
+ z4 = svread_hor_za16_u16_vg4 (0, w12 + 1),
+ z4 = svread_hor_za16_u16_vg4 (0, w12 + 1))
+
+/*
+** read_za16_s16_z28_1_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {z28\.h - z31\.h}, za1h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z28_1_w12p2, svint16x4_t,
+ z28 = svread_hor_za16_s16_vg4 (1, w12 + 2),
+ z28 = svread_hor_za16_s16_vg4 (1, w12 + 2))
+
+/*
+** read_za16_f16_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.h - z3\.h}, za0h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_f16_z0_0_w15p3, svfloat16x4_t,
+ z0 = svread_hor_za16_f16_vg4 (0, w15 + 3),
+ z0 = svread_hor_za16_f16_vg4 (0, w15 + 3))
+
+/*
+** read_za16_u16_z28_1_w12p6:
+** add (w[0-9]+), w12, #?6
+** mova {z28\.h - z31\.h}, za1h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z28_1_w12p6, svuint16x4_t,
+ z28 = svread_hor_za16_u16_vg4 (1, w12 + 6),
+ z28 = svread_hor_za16_u16_vg4 (1, w12 + 6))
+
+/*
+** read_za16_s16_z0_0_w15p8:
+** add (w[0-9]+), w15, #?8
+** mova {z0\.h - z3\.h}, za0h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_0_w15p8, svint16x4_t,
+ z0 = svread_hor_za16_s16_vg4 (0, w15 + 8),
+ z0 = svread_hor_za16_s16_vg4 (0, w15 + 8))
+
+/*
+** read_za16_bf16_z4_1_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.h - z7\.h}, za1h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_bf16_z4_1_w12m1, svbfloat16x4_t,
+ z4 = svread_hor_za16_bf16_vg4 (1, w12 - 1),
+ z4 = svread_hor_za16_bf16_vg4 (1, w12 - 1))
+
+/*
+** read_za16_u16_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.h - z31\.h}, za0h\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z28_0_w16, svuint16x4_t,
+ z28 = svread_hor_za16_u16_vg4 (0, w16),
+ z28 = svread_hor_za16_u16_vg4 (0, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za32_s32_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.s - z1\.s}, za0h\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z0_0_0, svint32x2_t,
+ z0 = svread_hor_za32_s32_vg2 (0, 0),
+ z0 = svread_hor_za32_s32_vg2 (0, 0))
+
+/*
+** read_za32_u32_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.s - z5\.s}, za1h\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z4_1_1, svuint32x2_t,
+ z4 = svread_hor_za32_u32_vg2 (1, 1),
+ z4 = svread_hor_za32_u32_vg2 (1, 1))
+
+/*
+** read_za32_f32_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.s - z29\.s}, za2h\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z28_2_w11, svfloat32x2_t,
+ z28 = svread_hor_za32_f32_vg2 (2, w11),
+ z28 = svread_hor_za32_f32_vg2 (2, w11))
+
+/*
+** read_za32_f32_z0_3_w12:
+** mova {z0\.s - z1\.s}, za3h\.s\[w12, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z0_3_w12, svfloat32x2_t,
+ z0 = svread_hor_za32_f32_vg2 (3, w12),
+ z0 = svread_hor_za32_f32_vg2 (3, w12))
+
+/*
+** read_za32_u32_z18_0_w15:
+** mova {z18\.s - z19\.s}, za0h\.s\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z18_0_w15, svuint32x2_t,
+ z18 = svread_hor_za32_u32_vg2 (0, w15),
+ z18 = svread_hor_za32_u32_vg2 (0, w15))
+
+/*
+** read_za32_s32_z23_1_w12p2:
+** mova {[^\n]+}, za1h\.s\[w12, 2:3\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z23_1_w12p2, svint32x2_t,
+ z23 = svread_hor_za32_s32_vg2 (1, w12 + 2),
+ z23 = svread_hor_za32_s32_vg2 (1, w12 + 2))
+
+/*
+** read_za32_f32_z4_2_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.s - z5\.s}, za2h\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z4_2_w12p1, svfloat32x2_t,
+ z4 = svread_hor_za32_f32_vg2 (2, w12 + 1),
+ z4 = svread_hor_za32_f32_vg2 (2, w12 + 1))
+
+/*
+** read_za32_u32_z0_3_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.s - z1\.s}, za3h\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z0_3_w15p3, svuint32x2_t,
+ z0 = svread_hor_za32_u32_vg2 (3, w15 + 3),
+ z0 = svread_hor_za32_u32_vg2 (3, w15 + 3))
+
+/*
+** read_za32_s32_z0_1_w15p4:
+** add (w[0-9]+), w15, #?4
+** mova {z0\.s - z1\.s}, za1h\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z0_1_w15p4, svint32x2_t,
+ z0 = svread_hor_za32_s32_vg2 (1, w15 + 4),
+ z0 = svread_hor_za32_s32_vg2 (1, w15 + 4))
+
+/*
+** read_za32_u32_z4_3_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.s - z5\.s}, za3h\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z4_3_w12m1, svuint32x2_t,
+ z4 = svread_hor_za32_u32_vg2 (3, w12 - 1),
+ z4 = svread_hor_za32_u32_vg2 (3, w12 - 1))
+
+/*
+** read_za32_u32_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.s - z19\.s}, za1h\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z18_1_w16, svuint32x2_t,
+ z18 = svread_hor_za32_u32_vg2 (1, w16),
+ z18 = svread_hor_za32_u32_vg2 (1, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za32_s32_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.s - z3\.s}, za0h\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z0_0_0, svint32x4_t,
+ z0 = svread_hor_za32_s32_vg4 (0, 0),
+ z0 = svread_hor_za32_s32_vg4 (0, 0))
+
+/*
+** read_za32_u32_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.s - z7\.s}, za1h\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z4_1_1, svuint32x4_t,
+ z4 = svread_hor_za32_u32_vg4 (1, 1),
+ z4 = svread_hor_za32_u32_vg4 (1, 1))
+
+/*
+** read_za32_f32_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.s - z31\.s}, za2h\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z28_2_w11, svfloat32x4_t,
+ z28 = svread_hor_za32_f32_vg4 (2, w11),
+ z28 = svread_hor_za32_f32_vg4 (2, w11))
+
+/*
+** read_za32_s32_z0_3_w12:
+** mova {z0\.s - z3\.s}, za3h\.s\[w12, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z0_3_w12, svint32x4_t,
+ z0 = svread_hor_za32_s32_vg4 (3, w12),
+ z0 = svread_hor_za32_s32_vg4 (3, w12))
+
+/*
+** read_za32_u32_z18_0_w15:
+** mova {[^\n]+}, za0h\.s\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z18_0_w15, svuint32x4_t,
+ z18 = svread_hor_za32_u32_vg4 (0, w15),
+ z18 = svread_hor_za32_u32_vg4 (0, w15))
+
+/*
+** read_za32_f32_z23_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova {[^\n]+}, za1h\.s\[\1, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z23_1_w12p4, svfloat32x4_t,
+ z23 = svread_hor_za32_f32_vg4 (1, w12 + 4),
+ z23 = svread_hor_za32_f32_vg4 (1, w12 + 4))
+
+/*
+** read_za32_u32_z4_2_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.s - z7\.s}, za2h\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z4_2_w12p1, svuint32x4_t,
+ z4 = svread_hor_za32_u32_vg4 (2, w12 + 1),
+ z4 = svread_hor_za32_u32_vg4 (2, w12 + 1))
+
+/*
+** read_za32_s32_z28_3_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {z28\.s - z31\.s}, za3h\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z28_3_w12p2, svint32x4_t,
+ z28 = svread_hor_za32_s32_vg4 (3, w12 + 2),
+ z28 = svread_hor_za32_s32_vg4 (3, w12 + 2))
+
+/*
+** read_za32_f32_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.s - z3\.s}, za0h\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z0_0_w15p3, svfloat32x4_t,
+ z0 = svread_hor_za32_f32_vg4 (0, w15 + 3),
+ z0 = svread_hor_za32_f32_vg4 (0, w15 + 3))
+
+/*
+** read_za32_u32_z28_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova {z28\.s - z31\.s}, za1h\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z28_1_w12p4, svuint32x4_t,
+ z28 = svread_hor_za32_u32_vg4 (1, w12 + 4),
+ z28 = svread_hor_za32_u32_vg4 (1, w12 + 4))
+
+/*
+** read_za32_f32_z4_2_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.s - z7\.s}, za2h\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z4_2_w12m1, svfloat32x4_t,
+ z4 = svread_hor_za32_f32_vg4 (2, w12 - 1),
+ z4 = svread_hor_za32_f32_vg4 (2, w12 - 1))
+
+/*
+** read_za32_u32_z28_3_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.s - z31\.s}, za3h\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z28_3_w16, svuint32x4_t,
+ z28 = svread_hor_za32_u32_vg4 (3, w16),
+ z28 = svread_hor_za32_u32_vg4 (3, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za64_s64_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.d - z1\.d}, za0h\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z0_0_0, svint64x2_t,
+ z0 = svread_hor_za64_s64_vg2 (0, 0),
+ z0 = svread_hor_za64_s64_vg2 (0, 0))
+
+/*
+** read_za64_u64_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.d - z5\.d}, za1h\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z4_1_1, svuint64x2_t,
+ z4 = svread_hor_za64_u64_vg2 (1, 1),
+ z4 = svread_hor_za64_u64_vg2 (1, 1))
+
+/*
+** read_za64_f64_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.d - z29\.d}, za2h\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z28_2_w11, svfloat64x2_t,
+ z28 = svread_hor_za64_f64_vg2 (2, w11),
+ z28 = svread_hor_za64_f64_vg2 (2, w11))
+
+/*
+** read_za64_f64_z0_3_w12:
+** mova {z0\.d - z1\.d}, za3h\.d\[w12, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z0_3_w12, svfloat64x2_t,
+ z0 = svread_hor_za64_f64_vg2 (3, w12),
+ z0 = svread_hor_za64_f64_vg2 (3, w12))
+
+/*
+** read_za64_u64_z18_4_w15:
+** mova {z18\.d - z19\.d}, za4h\.d\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z18_4_w15, svuint64x2_t,
+ z18 = svread_hor_za64_u64_vg2 (4, w15),
+ z18 = svread_hor_za64_u64_vg2 (4, w15))
+
+/*
+** read_za64_s64_z23_5_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {[^\n]+}, za5h\.d\[\1, 0:1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z23_5_w12p2, svint64x2_t,
+ z23 = svread_hor_za64_s64_vg2 (5, w12 + 2),
+ z23 = svread_hor_za64_s64_vg2 (5, w12 + 2))
+
+/*
+** read_za64_f64_z4_6_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.d - z5\.d}, za6h\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z4_6_w12p1, svfloat64x2_t,
+ z4 = svread_hor_za64_f64_vg2 (6, w12 + 1),
+ z4 = svread_hor_za64_f64_vg2 (6, w12 + 1))
+
+/*
+** read_za64_u64_z0_7_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.d - z1\.d}, za7h\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z0_7_w15p3, svuint64x2_t,
+ z0 = svread_hor_za64_u64_vg2 (7, w15 + 3),
+ z0 = svread_hor_za64_u64_vg2 (7, w15 + 3))
+
+/*
+** read_za64_s64_z0_1_w15p4:
+** add (w[0-9]+), w15, #?4
+** mova {z0\.d - z1\.d}, za1h\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z0_1_w15p4, svint64x2_t,
+ z0 = svread_hor_za64_s64_vg2 (1, w15 + 4),
+ z0 = svread_hor_za64_s64_vg2 (1, w15 + 4))
+
+/*
+** read_za64_u64_z4_3_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.d - z5\.d}, za3h\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z4_3_w12m1, svuint64x2_t,
+ z4 = svread_hor_za64_u64_vg2 (3, w12 - 1),
+ z4 = svread_hor_za64_u64_vg2 (3, w12 - 1))
+
+/*
+** read_za64_u64_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.d - z19\.d}, za1h\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z18_1_w16, svuint64x2_t,
+ z18 = svread_hor_za64_u64_vg2 (1, w16),
+ z18 = svread_hor_za64_u64_vg2 (1, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za64_s64_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.d - z3\.d}, za0h\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z0_0_0, svint64x4_t,
+ z0 = svread_hor_za64_s64_vg4 (0, 0),
+ z0 = svread_hor_za64_s64_vg4 (0, 0))
+
+/*
+** read_za64_u64_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.d - z7\.d}, za1h\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z4_1_1, svuint64x4_t,
+ z4 = svread_hor_za64_u64_vg4 (1, 1),
+ z4 = svread_hor_za64_u64_vg4 (1, 1))
+
+/*
+** read_za64_f64_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.d - z31\.d}, za2h\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z28_2_w11, svfloat64x4_t,
+ z28 = svread_hor_za64_f64_vg4 (2, w11),
+ z28 = svread_hor_za64_f64_vg4 (2, w11))
+
+/*
+** read_za64_s64_z0_3_w12:
+** mova {z0\.d - z3\.d}, za3h\.d\[w12, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z0_3_w12, svint64x4_t,
+ z0 = svread_hor_za64_s64_vg4 (3, w12),
+ z0 = svread_hor_za64_s64_vg4 (3, w12))
+
+/*
+** read_za64_u64_z18_4_w15:
+** mova {[^\n]+}, za4h\.d\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z18_4_w15, svuint64x4_t,
+ z18 = svread_hor_za64_u64_vg4 (4, w15),
+ z18 = svread_hor_za64_u64_vg4 (4, w15))
+
+/*
+** read_za64_f64_z23_5_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova {[^\n]+}, za5h\.d\[\1, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z23_5_w12p4, svfloat64x4_t,
+ z23 = svread_hor_za64_f64_vg4 (5, w12 + 4),
+ z23 = svread_hor_za64_f64_vg4 (5, w12 + 4))
+
+/*
+** read_za64_u64_z4_6_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.d - z7\.d}, za6h\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z4_6_w12p1, svuint64x4_t,
+ z4 = svread_hor_za64_u64_vg4 (6, w12 + 1),
+ z4 = svread_hor_za64_u64_vg4 (6, w12 + 1))
+
+/*
+** read_za64_s64_z28_7_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {z28\.d - z31\.d}, za7h\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z28_7_w12p2, svint64x4_t,
+ z28 = svread_hor_za64_s64_vg4 (7, w12 + 2),
+ z28 = svread_hor_za64_s64_vg4 (7, w12 + 2))
+
+/*
+** read_za64_f64_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.d - z3\.d}, za0h\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z0_0_w15p3, svfloat64x4_t,
+ z0 = svread_hor_za64_f64_vg4 (0, w15 + 3),
+ z0 = svread_hor_za64_f64_vg4 (0, w15 + 3))
+
+/*
+** read_za64_u64_z28_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova {z28\.d - z31\.d}, za1h\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z28_1_w12p4, svuint64x4_t,
+ z28 = svread_hor_za64_u64_vg4 (1, w12 + 4),
+ z28 = svread_hor_za64_u64_vg4 (1, w12 + 4))
+
+/*
+** read_za64_f64_z4_2_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.d - z7\.d}, za2h\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z4_2_w12m1, svfloat64x4_t,
+ z4 = svread_hor_za64_f64_vg4 (2, w12 - 1),
+ z4 = svread_hor_za64_f64_vg4 (2, w12 - 1))
+
+/*
+** read_za64_u64_z28_3_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.d - z31\.d}, za3h\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z28_3_w16, svuint64x4_t,
+ z28 = svread_hor_za64_u64_vg4 (3, w16),
+ z28 = svread_hor_za64_u64_vg4 (3, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za8_s8_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_0, svint8x2_t,
+ z0 = svread_hor_za8_s8_vg2 (0, 0),
+ z0 = svread_hor_za8_s8_vg2 (0, 0))
+
+/*
+** read_za8_u8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x2_t,
+ z4 = svread_hor_za8_u8_vg2 (0, 1),
+ z4 = svread_hor_za8_u8_vg2 (0, 1))
+
+/*
+** read_za8_s8_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.b - z29\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z28_0_w11, svint8x2_t,
+ z28 = svread_hor_za8_s8_vg2 (0, w11),
+ z28 = svread_hor_za8_s8_vg2 (0, w11))
+
+/*
+** read_za8_s8_z0_0_w12:
+** mova {z0\.b - z1\.b}, za0h\.b\[w12, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_w12, svint8x2_t,
+ z0 = svread_hor_za8_s8_vg2 (0, w12),
+ z0 = svread_hor_za8_s8_vg2 (0, w12))
+
+/*
+** read_za8_u8_z18_0_w15:
+** mova {z18\.b - z19\.b}, za0h\.b\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x2_t,
+ z18 = svread_hor_za8_u8_vg2 (0, w15),
+ z18 = svread_hor_za8_u8_vg2 (0, w15))
+
+/*
+** read_za8_s8_z23_0_w12p14:
+** mova {[^\n]+}, za0h\.b\[w12, 14:15\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z23_0_w12p14, svint8x2_t,
+ z23 = svread_hor_za8_s8_vg2 (0, w12 + 14),
+ z23 = svread_hor_za8_s8_vg2 (0, w12 + 14))
+
+/*
+** read_za8_u8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x2_t,
+ z4 = svread_hor_za8_u8_vg2 (0, w12 + 1),
+ z4 = svread_hor_za8_u8_vg2 (0, w12 + 1))
+
+/*
+** read_za8_s8_z28_0_w12p2:
+** mova {z28\.b - z29\.b}, za0h\.b\[w12, 2:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z28_0_w12p2, svint8x2_t,
+ z28 = svread_hor_za8_s8_vg2 (0, w12 + 2),
+ z28 = svread_hor_za8_s8_vg2 (0, w12 + 2))
+
+/*
+** read_za8_u8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x2_t,
+ z0 = svread_hor_za8_u8_vg2 (0, w15 + 3),
+ z0 = svread_hor_za8_u8_vg2 (0, w15 + 3))
+
+/*
+** read_za8_u8_z4_0_w15p12:
+** mova {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x2_t,
+ z4 = svread_hor_za8_u8_vg2 (0, w15 + 12),
+ z4 = svread_hor_za8_u8_vg2 (0, w15 + 12))
+
+/*
+** read_za8_u8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** mova {z28\.b - z29\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p15, svuint8x2_t,
+ z28 = svread_hor_za8_u8_vg2 (0, w12 + 15),
+ z28 = svread_hor_za8_u8_vg2 (0, w12 + 15))
+
+/*
+** read_za8_s8_z0_0_w15p16:
+** add (w[0-9]+), w15, #?16
+** mova {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_w15p16, svint8x2_t,
+ z0 = svread_hor_za8_s8_vg2 (0, w15 + 16),
+ z0 = svread_hor_za8_s8_vg2 (0, w15 + 16))
+
+/*
+** read_za8_u8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t,
+ z4 = svread_hor_za8_u8_vg2 (0, w12 - 1),
+ z4 = svread_hor_za8_u8_vg2 (0, w12 - 1))
+
+/*
+** read_za8_u8_z18_0_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.b - z19\.b}, za0h\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t,
+ z18 = svread_hor_za8_u8_vg2 (0, w16),
+ z18 = svread_hor_za8_u8_vg2 (0, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za8_s8_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_0, svint8x4_t,
+ z0 = svread_hor_za8_s8_vg4 (0, 0),
+ z0 = svread_hor_za8_s8_vg4 (0, 0))
+
+/*
+** read_za8_u8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x4_t,
+ z4 = svread_hor_za8_u8_vg4 (0, 1),
+ z4 = svread_hor_za8_u8_vg4 (0, 1))
+
+/*
+** read_za8_s8_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z28_0_w11, svint8x4_t,
+ z28 = svread_hor_za8_s8_vg4 (0, w11),
+ z28 = svread_hor_za8_s8_vg4 (0, w11))
+
+/*
+** read_za8_s8_z0_0_w12:
+** mova {z0\.b - z3\.b}, za0h\.b\[w12, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_w12, svint8x4_t,
+ z0 = svread_hor_za8_s8_vg4 (0, w12),
+ z0 = svread_hor_za8_s8_vg4 (0, w12))
+
+/*
+** read_za8_u8_z18_0_w15:
+** mova {[^\n]+}, za0h\.b\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x4_t,
+ z18 = svread_hor_za8_u8_vg4 (0, w15),
+ z18 = svread_hor_za8_u8_vg4 (0, w15))
+
+/*
+** read_za8_s8_z23_0_w12p12:
+** mova {[^\n]+}, za0h\.b\[w12, 12:15\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z23_0_w12p12, svint8x4_t,
+ z23 = svread_hor_za8_s8_vg4 (0, w12 + 12),
+ z23 = svread_hor_za8_s8_vg4 (0, w12 + 12))
+
+/*
+** read_za8_u8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x4_t,
+ z4 = svread_hor_za8_u8_vg4 (0, w12 + 1),
+ z4 = svread_hor_za8_u8_vg4 (0, w12 + 1))
+
+/*
+** read_za8_s8_z28_0_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z28_0_w12p2, svint8x4_t,
+ z28 = svread_hor_za8_s8_vg4 (0, w12 + 2),
+ z28 = svread_hor_za8_s8_vg4 (0, w12 + 2))
+
+/*
+** read_za8_u8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x4_t,
+ z0 = svread_hor_za8_u8_vg4 (0, w15 + 3),
+ z0 = svread_hor_za8_u8_vg4 (0, w15 + 3))
+
+/*
+** read_za8_u8_z0_0_w12p4:
+** mova {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z0_0_w12p4, svuint8x4_t,
+ z0 = svread_hor_za8_u8_vg4 (0, w12 + 4),
+ z0 = svread_hor_za8_u8_vg4 (0, w12 + 4))
+
+/*
+** read_za8_u8_z4_0_w15p12:
+** mova {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x4_t,
+ z4 = svread_hor_za8_u8_vg4 (0, w15 + 12),
+ z4 = svread_hor_za8_u8_vg4 (0, w15 + 12))
+
+/*
+** read_za8_u8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p14, svuint8x4_t,
+ z28 = svread_hor_za8_u8_vg4 (0, w12 + 14),
+ z28 = svread_hor_za8_u8_vg4 (0, w12 + 14))
+
+/*
+** read_za8_s8_z0_0_w15p16:
+** add (w[0-9]+), w15, #?16
+** mova {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_w15p16, svint8x4_t,
+ z0 = svread_hor_za8_s8_vg4 (0, w15 + 16),
+ z0 = svread_hor_za8_s8_vg4 (0, w15 + 16))
+
+/*
+** read_za8_u8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t,
+ z4 = svread_hor_za8_u8_vg4 (0, w12 - 1),
+ z4 = svread_hor_za8_u8_vg4 (0, w12 - 1))
+
+/*
+** read_za8_u8_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t,
+ z28 = svread_hor_za8_u8_vg4 (0, w16),
+ z28 = svread_hor_za8_u8_vg4 (0, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za16_s16_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.h - z1\.h}, za0v\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_0_0, svint16x2_t,
+ z0 = svread_ver_za16_s16_vg2 (0, 0),
+ z0 = svread_ver_za16_s16_vg2 (0, 0))
+
+/*
+** read_za16_u16_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.h - z5\.h}, za1v\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z4_1_1, svuint16x2_t,
+ z4 = svread_ver_za16_u16_vg2 (1, 1),
+ z4 = svread_ver_za16_u16_vg2 (1, 1))
+
+/*
+** read_za16_f16_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.h - z29\.h}, za0v\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_f16_z28_0_w11, svfloat16x2_t,
+ z28 = svread_ver_za16_f16_vg2 (0, w11),
+ z28 = svread_ver_za16_f16_vg2 (0, w11))
+
+/*
+** read_za16_bf16_z0_1_w12:
+** mova {z0\.h - z1\.h}, za1v\.h\[w12, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_bf16_z0_1_w12, svbfloat16x2_t,
+ z0 = svread_ver_za16_bf16_vg2 (1, w12),
+ z0 = svread_ver_za16_bf16_vg2 (1, w12))
+
+/*
+** read_za16_u16_z18_0_w15:
+** mova {z18\.h - z19\.h}, za0v\.h\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z18_0_w15, svuint16x2_t,
+ z18 = svread_ver_za16_u16_vg2 (0, w15),
+ z18 = svread_ver_za16_u16_vg2 (0, w15))
+
+/*
+** read_za16_s16_z23_1_w12p6:
+** mova {[^\n]+}, za1v\.h\[w12, 6:7\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z23_1_w12p6, svint16x2_t,
+ z23 = svread_ver_za16_s16_vg2 (1, w12 + 6),
+ z23 = svread_ver_za16_s16_vg2 (1, w12 + 6))
+
+/*
+** read_za16_f16_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.h - z5\.h}, za0v\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_f16_z4_0_w12p1, svfloat16x2_t,
+ z4 = svread_ver_za16_f16_vg2 (0, w12 + 1),
+ z4 = svread_ver_za16_f16_vg2 (0, w12 + 1))
+
+/*
+** read_za16_s16_z28_1_w12p2:
+** mova {z28\.h - z29\.h}, za1v\.h\[w12, 2:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z28_1_w12p2, svint16x2_t,
+ z28 = svread_ver_za16_s16_vg2 (1, w12 + 2),
+ z28 = svread_ver_za16_s16_vg2 (1, w12 + 2))
+
+/*
+** read_za16_u16_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.h - z1\.h}, za0v\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z0_0_w15p3, svuint16x2_t,
+ z0 = svread_ver_za16_u16_vg2 (0, w15 + 3),
+ z0 = svread_ver_za16_u16_vg2 (0, w15 + 3))
+
+/*
+** read_za16_bf16_z4_1_w15p4:
+** mova {z4\.h - z5\.h}, za1v\.h\[w15, 4:5\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_bf16_z4_1_w15p4, svbfloat16x2_t,
+ z4 = svread_ver_za16_bf16_vg2 (1, w15 + 4),
+ z4 = svread_ver_za16_bf16_vg2 (1, w15 + 4))
+
+/*
+** read_za16_u16_z28_0_w12p7:
+** add (w[0-9]+), w12, #?7
+** mova {z28\.h - z29\.h}, za0v\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z28_0_w12p7, svuint16x2_t,
+ z28 = svread_ver_za16_u16_vg2 (0, w12 + 7),
+ z28 = svread_ver_za16_u16_vg2 (0, w12 + 7))
+
+/*
+** read_za16_s16_z0_1_w15p8:
+** add (w[0-9]+), w15, #?8
+** mova {z0\.h - z1\.h}, za1v\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_1_w15p8, svint16x2_t,
+ z0 = svread_ver_za16_s16_vg2 (1, w15 + 8),
+ z0 = svread_ver_za16_s16_vg2 (1, w15 + 8))
+
+/*
+** read_za16_u16_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.h - z5\.h}, za0v\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z4_0_w12m1, svuint16x2_t,
+ z4 = svread_ver_za16_u16_vg2 (0, w12 - 1),
+ z4 = svread_ver_za16_u16_vg2 (0, w12 - 1))
+
+/*
+** read_za16_u16_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.h - z19\.h}, za1v\.h\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z18_1_w16, svuint16x2_t,
+ z18 = svread_ver_za16_u16_vg2 (1, w16),
+ z18 = svread_ver_za16_u16_vg2 (1, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za16_s16_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.h - z3\.h}, za0v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_0_0, svint16x4_t,
+ z0 = svread_ver_za16_s16_vg4 (0, 0),
+ z0 = svread_ver_za16_s16_vg4 (0, 0))
+
+/*
+** read_za16_u16_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.h - z7\.h}, za1v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z4_1_1, svuint16x4_t,
+ z4 = svread_ver_za16_u16_vg4 (1, 1),
+ z4 = svread_ver_za16_u16_vg4 (1, 1))
+
+/*
+** read_za16_f16_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.h - z31\.h}, za0v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_f16_z28_0_w11, svfloat16x4_t,
+ z28 = svread_ver_za16_f16_vg4 (0, w11),
+ z28 = svread_ver_za16_f16_vg4 (0, w11))
+
+/*
+** read_za16_s16_z0_1_w12:
+** mova {z0\.h - z3\.h}, za1v\.h\[w12, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_1_w12, svint16x4_t,
+ z0 = svread_ver_za16_s16_vg4 (1, w12),
+ z0 = svread_ver_za16_s16_vg4 (1, w12))
+
+/*
+** read_za16_u16_z18_0_w15:
+** mova {[^\n]+}, za0v\.h\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z18_0_w15, svuint16x4_t,
+ z18 = svread_ver_za16_u16_vg4 (0, w15),
+ z18 = svread_ver_za16_u16_vg4 (0, w15))
+
+/*
+** read_za16_bf16_z23_1_w12p4:
+** mova {[^\n]+}, za1v\.h\[w12, 4:7\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_bf16_z23_1_w12p4, svbfloat16x4_t,
+ z23 = svread_ver_za16_bf16_vg4 (1, w12 + 4),
+ z23 = svread_ver_za16_bf16_vg4 (1, w12 + 4))
+
+/*
+** read_za16_u16_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.h - z7\.h}, za0v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z4_0_w12p1, svuint16x4_t,
+ z4 = svread_ver_za16_u16_vg4 (0, w12 + 1),
+ z4 = svread_ver_za16_u16_vg4 (0, w12 + 1))
+
+/*
+** read_za16_s16_z28_1_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {z28\.h - z31\.h}, za1v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z28_1_w12p2, svint16x4_t,
+ z28 = svread_ver_za16_s16_vg4 (1, w12 + 2),
+ z28 = svread_ver_za16_s16_vg4 (1, w12 + 2))
+
+/*
+** read_za16_f16_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.h - z3\.h}, za0v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_f16_z0_0_w15p3, svfloat16x4_t,
+ z0 = svread_ver_za16_f16_vg4 (0, w15 + 3),
+ z0 = svread_ver_za16_f16_vg4 (0, w15 + 3))
+
+/*
+** read_za16_u16_z28_1_w12p6:
+** add (w[0-9]+), w12, #?6
+** mova {z28\.h - z31\.h}, za1v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z28_1_w12p6, svuint16x4_t,
+ z28 = svread_ver_za16_u16_vg4 (1, w12 + 6),
+ z28 = svread_ver_za16_u16_vg4 (1, w12 + 6))
+
+/*
+** read_za16_s16_z0_0_w15p8:
+** add (w[0-9]+), w15, #?8
+** mova {z0\.h - z3\.h}, za0v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_s16_z0_0_w15p8, svint16x4_t,
+ z0 = svread_ver_za16_s16_vg4 (0, w15 + 8),
+ z0 = svread_ver_za16_s16_vg4 (0, w15 + 8))
+
+/*
+** read_za16_bf16_z4_1_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.h - z7\.h}, za1v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_bf16_z4_1_w12m1, svbfloat16x4_t,
+ z4 = svread_ver_za16_bf16_vg4 (1, w12 - 1),
+ z4 = svread_ver_za16_bf16_vg4 (1, w12 - 1))
+
+/*
+** read_za16_u16_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.h - z31\.h}, za0v\.h\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za16_u16_z28_0_w16, svuint16x4_t,
+ z28 = svread_ver_za16_u16_vg4 (0, w16),
+ z28 = svread_ver_za16_u16_vg4 (0, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za32_s32_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.s - z1\.s}, za0v\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z0_0_0, svint32x2_t,
+ z0 = svread_ver_za32_s32_vg2 (0, 0),
+ z0 = svread_ver_za32_s32_vg2 (0, 0))
+
+/*
+** read_za32_u32_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.s - z5\.s}, za1v\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z4_1_1, svuint32x2_t,
+ z4 = svread_ver_za32_u32_vg2 (1, 1),
+ z4 = svread_ver_za32_u32_vg2 (1, 1))
+
+/*
+** read_za32_f32_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.s - z29\.s}, za2v\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z28_2_w11, svfloat32x2_t,
+ z28 = svread_ver_za32_f32_vg2 (2, w11),
+ z28 = svread_ver_za32_f32_vg2 (2, w11))
+
+/*
+** read_za32_f32_z0_3_w12:
+** mova {z0\.s - z1\.s}, za3v\.s\[w12, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z0_3_w12, svfloat32x2_t,
+ z0 = svread_ver_za32_f32_vg2 (3, w12),
+ z0 = svread_ver_za32_f32_vg2 (3, w12))
+
+/*
+** read_za32_u32_z18_0_w15:
+** mova {z18\.s - z19\.s}, za0v\.s\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z18_0_w15, svuint32x2_t,
+ z18 = svread_ver_za32_u32_vg2 (0, w15),
+ z18 = svread_ver_za32_u32_vg2 (0, w15))
+
+/*
+** read_za32_s32_z23_1_w12p2:
+** mova {[^\n]+}, za1v\.s\[w12, 2:3\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z23_1_w12p2, svint32x2_t,
+ z23 = svread_ver_za32_s32_vg2 (1, w12 + 2),
+ z23 = svread_ver_za32_s32_vg2 (1, w12 + 2))
+
+/*
+** read_za32_f32_z4_2_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.s - z5\.s}, za2v\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z4_2_w12p1, svfloat32x2_t,
+ z4 = svread_ver_za32_f32_vg2 (2, w12 + 1),
+ z4 = svread_ver_za32_f32_vg2 (2, w12 + 1))
+
+/*
+** read_za32_u32_z0_3_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.s - z1\.s}, za3v\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z0_3_w15p3, svuint32x2_t,
+ z0 = svread_ver_za32_u32_vg2 (3, w15 + 3),
+ z0 = svread_ver_za32_u32_vg2 (3, w15 + 3))
+
+/*
+** read_za32_s32_z0_1_w15p4:
+** add (w[0-9]+), w15, #?4
+** mova {z0\.s - z1\.s}, za1v\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z0_1_w15p4, svint32x2_t,
+ z0 = svread_ver_za32_s32_vg2 (1, w15 + 4),
+ z0 = svread_ver_za32_s32_vg2 (1, w15 + 4))
+
+/*
+** read_za32_u32_z4_3_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.s - z5\.s}, za3v\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z4_3_w12m1, svuint32x2_t,
+ z4 = svread_ver_za32_u32_vg2 (3, w12 - 1),
+ z4 = svread_ver_za32_u32_vg2 (3, w12 - 1))
+
+/*
+** read_za32_u32_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.s - z19\.s}, za1v\.s\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z18_1_w16, svuint32x2_t,
+ z18 = svread_ver_za32_u32_vg2 (1, w16),
+ z18 = svread_ver_za32_u32_vg2 (1, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za32_s32_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.s - z3\.s}, za0v\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z0_0_0, svint32x4_t,
+ z0 = svread_ver_za32_s32_vg4 (0, 0),
+ z0 = svread_ver_za32_s32_vg4 (0, 0))
+
+/*
+** read_za32_u32_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.s - z7\.s}, za1v\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z4_1_1, svuint32x4_t,
+ z4 = svread_ver_za32_u32_vg4 (1, 1),
+ z4 = svread_ver_za32_u32_vg4 (1, 1))
+
+/*
+** read_za32_f32_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.s - z31\.s}, za2v\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z28_2_w11, svfloat32x4_t,
+ z28 = svread_ver_za32_f32_vg4 (2, w11),
+ z28 = svread_ver_za32_f32_vg4 (2, w11))
+
+/*
+** read_za32_s32_z0_3_w12:
+** mova {z0\.s - z3\.s}, za3v\.s\[w12, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z0_3_w12, svint32x4_t,
+ z0 = svread_ver_za32_s32_vg4 (3, w12),
+ z0 = svread_ver_za32_s32_vg4 (3, w12))
+
+/*
+** read_za32_u32_z18_0_w15:
+** mova {[^\n]+}, za0v\.s\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z18_0_w15, svuint32x4_t,
+ z18 = svread_ver_za32_u32_vg4 (0, w15),
+ z18 = svread_ver_za32_u32_vg4 (0, w15))
+
+/*
+** read_za32_f32_z23_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova {[^\n]+}, za1v\.s\[\1, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z23_1_w12p4, svfloat32x4_t,
+ z23 = svread_ver_za32_f32_vg4 (1, w12 + 4),
+ z23 = svread_ver_za32_f32_vg4 (1, w12 + 4))
+
+/*
+** read_za32_u32_z4_2_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.s - z7\.s}, za2v\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z4_2_w12p1, svuint32x4_t,
+ z4 = svread_ver_za32_u32_vg4 (2, w12 + 1),
+ z4 = svread_ver_za32_u32_vg4 (2, w12 + 1))
+
+/*
+** read_za32_s32_z28_3_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {z28\.s - z31\.s}, za3v\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_s32_z28_3_w12p2, svint32x4_t,
+ z28 = svread_ver_za32_s32_vg4 (3, w12 + 2),
+ z28 = svread_ver_za32_s32_vg4 (3, w12 + 2))
+
+/*
+** read_za32_f32_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.s - z3\.s}, za0v\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z0_0_w15p3, svfloat32x4_t,
+ z0 = svread_ver_za32_f32_vg4 (0, w15 + 3),
+ z0 = svread_ver_za32_f32_vg4 (0, w15 + 3))
+
+/*
+** read_za32_u32_z28_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova {z28\.s - z31\.s}, za1v\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z28_1_w12p4, svuint32x4_t,
+ z28 = svread_ver_za32_u32_vg4 (1, w12 + 4),
+ z28 = svread_ver_za32_u32_vg4 (1, w12 + 4))
+
+/*
+** read_za32_f32_z4_2_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.s - z7\.s}, za2v\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_f32_z4_2_w12m1, svfloat32x4_t,
+ z4 = svread_ver_za32_f32_vg4 (2, w12 - 1),
+ z4 = svread_ver_za32_f32_vg4 (2, w12 - 1))
+
+/*
+** read_za32_u32_z28_3_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.s - z31\.s}, za3v\.s\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za32_u32_z28_3_w16, svuint32x4_t,
+ z28 = svread_ver_za32_u32_vg4 (3, w16),
+ z28 = svread_ver_za32_u32_vg4 (3, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za64_s64_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.d - z1\.d}, za0v\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z0_0_0, svint64x2_t,
+ z0 = svread_ver_za64_s64_vg2 (0, 0),
+ z0 = svread_ver_za64_s64_vg2 (0, 0))
+
+/*
+** read_za64_u64_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.d - z5\.d}, za1v\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z4_1_1, svuint64x2_t,
+ z4 = svread_ver_za64_u64_vg2 (1, 1),
+ z4 = svread_ver_za64_u64_vg2 (1, 1))
+
+/*
+** read_za64_f64_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.d - z29\.d}, za2v\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z28_2_w11, svfloat64x2_t,
+ z28 = svread_ver_za64_f64_vg2 (2, w11),
+ z28 = svread_ver_za64_f64_vg2 (2, w11))
+
+/*
+** read_za64_f64_z0_3_w12:
+** mova {z0\.d - z1\.d}, za3v\.d\[w12, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z0_3_w12, svfloat64x2_t,
+ z0 = svread_ver_za64_f64_vg2 (3, w12),
+ z0 = svread_ver_za64_f64_vg2 (3, w12))
+
+/*
+** read_za64_u64_z18_4_w15:
+** mova {z18\.d - z19\.d}, za4v\.d\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z18_4_w15, svuint64x2_t,
+ z18 = svread_ver_za64_u64_vg2 (4, w15),
+ z18 = svread_ver_za64_u64_vg2 (4, w15))
+
+/*
+** read_za64_s64_z23_5_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {[^\n]+}, za5v\.d\[\1, 0:1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z23_5_w12p2, svint64x2_t,
+ z23 = svread_ver_za64_s64_vg2 (5, w12 + 2),
+ z23 = svread_ver_za64_s64_vg2 (5, w12 + 2))
+
+/*
+** read_za64_f64_z4_6_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.d - z5\.d}, za6v\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z4_6_w12p1, svfloat64x2_t,
+ z4 = svread_ver_za64_f64_vg2 (6, w12 + 1),
+ z4 = svread_ver_za64_f64_vg2 (6, w12 + 1))
+
+/*
+** read_za64_u64_z0_7_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.d - z1\.d}, za7v\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z0_7_w15p3, svuint64x2_t,
+ z0 = svread_ver_za64_u64_vg2 (7, w15 + 3),
+ z0 = svread_ver_za64_u64_vg2 (7, w15 + 3))
+
+/*
+** read_za64_s64_z0_1_w15p4:
+** add (w[0-9]+), w15, #?4
+** mova {z0\.d - z1\.d}, za1v\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z0_1_w15p4, svint64x2_t,
+ z0 = svread_ver_za64_s64_vg2 (1, w15 + 4),
+ z0 = svread_ver_za64_s64_vg2 (1, w15 + 4))
+
+/*
+** read_za64_u64_z4_3_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.d - z5\.d}, za3v\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z4_3_w12m1, svuint64x2_t,
+ z4 = svread_ver_za64_u64_vg2 (3, w12 - 1),
+ z4 = svread_ver_za64_u64_vg2 (3, w12 - 1))
+
+/*
+** read_za64_u64_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.d - z19\.d}, za1v\.d\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z18_1_w16, svuint64x2_t,
+ z18 = svread_ver_za64_u64_vg2 (1, w16),
+ z18 = svread_ver_za64_u64_vg2 (1, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za64_s64_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.d - z3\.d}, za0v\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z0_0_0, svint64x4_t,
+ z0 = svread_ver_za64_s64_vg4 (0, 0),
+ z0 = svread_ver_za64_s64_vg4 (0, 0))
+
+/*
+** read_za64_u64_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.d - z7\.d}, za1v\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z4_1_1, svuint64x4_t,
+ z4 = svread_ver_za64_u64_vg4 (1, 1),
+ z4 = svread_ver_za64_u64_vg4 (1, 1))
+
+/*
+** read_za64_f64_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.d - z31\.d}, za2v\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z28_2_w11, svfloat64x4_t,
+ z28 = svread_ver_za64_f64_vg4 (2, w11),
+ z28 = svread_ver_za64_f64_vg4 (2, w11))
+
+/*
+** read_za64_s64_z0_3_w12:
+** mova {z0\.d - z3\.d}, za3v\.d\[w12, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z0_3_w12, svint64x4_t,
+ z0 = svread_ver_za64_s64_vg4 (3, w12),
+ z0 = svread_ver_za64_s64_vg4 (3, w12))
+
+/*
+** read_za64_u64_z18_4_w15:
+** mova {[^\n]+}, za4v\.d\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z18_4_w15, svuint64x4_t,
+ z18 = svread_ver_za64_u64_vg4 (4, w15),
+ z18 = svread_ver_za64_u64_vg4 (4, w15))
+
+/*
+** read_za64_f64_z23_5_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova {[^\n]+}, za5v\.d\[\1, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z23_5_w12p4, svfloat64x4_t,
+ z23 = svread_ver_za64_f64_vg4 (5, w12 + 4),
+ z23 = svread_ver_za64_f64_vg4 (5, w12 + 4))
+
+/*
+** read_za64_u64_z4_6_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.d - z7\.d}, za6v\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z4_6_w12p1, svuint64x4_t,
+ z4 = svread_ver_za64_u64_vg4 (6, w12 + 1),
+ z4 = svread_ver_za64_u64_vg4 (6, w12 + 1))
+
+/*
+** read_za64_s64_z28_7_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {z28\.d - z31\.d}, za7v\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_s64_z28_7_w12p2, svint64x4_t,
+ z28 = svread_ver_za64_s64_vg4 (7, w12 + 2),
+ z28 = svread_ver_za64_s64_vg4 (7, w12 + 2))
+
+/*
+** read_za64_f64_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.d - z3\.d}, za0v\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z0_0_w15p3, svfloat64x4_t,
+ z0 = svread_ver_za64_f64_vg4 (0, w15 + 3),
+ z0 = svread_ver_za64_f64_vg4 (0, w15 + 3))
+
+/*
+** read_za64_u64_z28_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova {z28\.d - z31\.d}, za1v\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z28_1_w12p4, svuint64x4_t,
+ z28 = svread_ver_za64_u64_vg4 (1, w12 + 4),
+ z28 = svread_ver_za64_u64_vg4 (1, w12 + 4))
+
+/*
+** read_za64_f64_z4_2_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.d - z7\.d}, za2v\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_f64_z4_2_w12m1, svfloat64x4_t,
+ z4 = svread_ver_za64_f64_vg4 (2, w12 - 1),
+ z4 = svread_ver_za64_f64_vg4 (2, w12 - 1))
+
+/*
+** read_za64_u64_z28_3_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.d - z31\.d}, za3v\.d\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za64_u64_z28_3_w16, svuint64x4_t,
+ z28 = svread_ver_za64_u64_vg4 (3, w16),
+ z28 = svread_ver_za64_u64_vg4 (3, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za8_s8_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_0, svint8x2_t,
+ z0 = svread_ver_za8_s8_vg2 (0, 0),
+ z0 = svread_ver_za8_s8_vg2 (0, 0))
+
+/*
+** read_za8_u8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x2_t,
+ z4 = svread_ver_za8_u8_vg2 (0, 1),
+ z4 = svread_ver_za8_u8_vg2 (0, 1))
+
+/*
+** read_za8_s8_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.b - z29\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z28_0_w11, svint8x2_t,
+ z28 = svread_ver_za8_s8_vg2 (0, w11),
+ z28 = svread_ver_za8_s8_vg2 (0, w11))
+
+/*
+** read_za8_s8_z0_0_w12:
+** mova {z0\.b - z1\.b}, za0v\.b\[w12, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_w12, svint8x2_t,
+ z0 = svread_ver_za8_s8_vg2 (0, w12),
+ z0 = svread_ver_za8_s8_vg2 (0, w12))
+
+/*
+** read_za8_u8_z18_0_w15:
+** mova {z18\.b - z19\.b}, za0v\.b\[w15, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x2_t,
+ z18 = svread_ver_za8_u8_vg2 (0, w15),
+ z18 = svread_ver_za8_u8_vg2 (0, w15))
+
+/*
+** read_za8_s8_z23_0_w12p14:
+** mova {[^\n]+}, za0v\.b\[w12, 14:15\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z23_0_w12p14, svint8x2_t,
+ z23 = svread_ver_za8_s8_vg2 (0, w12 + 14),
+ z23 = svread_ver_za8_s8_vg2 (0, w12 + 14))
+
+/*
+** read_za8_u8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x2_t,
+ z4 = svread_ver_za8_u8_vg2 (0, w12 + 1),
+ z4 = svread_ver_za8_u8_vg2 (0, w12 + 1))
+
+/*
+** read_za8_s8_z28_0_w12p2:
+** mova {z28\.b - z29\.b}, za0v\.b\[w12, 2:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z28_0_w12p2, svint8x2_t,
+ z28 = svread_ver_za8_s8_vg2 (0, w12 + 2),
+ z28 = svread_ver_za8_s8_vg2 (0, w12 + 2))
+
+/*
+** read_za8_u8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x2_t,
+ z0 = svread_ver_za8_u8_vg2 (0, w15 + 3),
+ z0 = svread_ver_za8_u8_vg2 (0, w15 + 3))
+
+/*
+** read_za8_u8_z4_0_w15p12:
+** mova {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x2_t,
+ z4 = svread_ver_za8_u8_vg2 (0, w15 + 12),
+ z4 = svread_ver_za8_u8_vg2 (0, w15 + 12))
+
+/*
+** read_za8_u8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** mova {z28\.b - z29\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p15, svuint8x2_t,
+ z28 = svread_ver_za8_u8_vg2 (0, w12 + 15),
+ z28 = svread_ver_za8_u8_vg2 (0, w12 + 15))
+
+/*
+** read_za8_s8_z0_0_w15p16:
+** add (w[0-9]+), w15, #?16
+** mova {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_w15p16, svint8x2_t,
+ z0 = svread_ver_za8_s8_vg2 (0, w15 + 16),
+ z0 = svread_ver_za8_s8_vg2 (0, w15 + 16))
+
+/*
+** read_za8_u8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t,
+ z4 = svread_ver_za8_u8_vg2 (0, w12 - 1),
+ z4 = svread_ver_za8_u8_vg2 (0, w12 - 1))
+
+/*
+** read_za8_u8_z18_0_w16:
+** mov (w1[2-5]), w16
+** mova {z18\.b - z19\.b}, za0v\.b\[\1, 0:1\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t,
+ z18 = svread_ver_za8_u8_vg2 (0, w16),
+ z18 = svread_ver_za8_u8_vg2 (0, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_za8_s8_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_0, svint8x4_t,
+ z0 = svread_ver_za8_s8_vg4 (0, 0),
+ z0 = svread_ver_za8_s8_vg4 (0, 0))
+
+/*
+** read_za8_u8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x4_t,
+ z4 = svread_ver_za8_u8_vg4 (0, 1),
+ z4 = svread_ver_za8_u8_vg4 (0, 1))
+
+/*
+** read_za8_s8_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z28_0_w11, svint8x4_t,
+ z28 = svread_ver_za8_s8_vg4 (0, w11),
+ z28 = svread_ver_za8_s8_vg4 (0, w11))
+
+/*
+** read_za8_s8_z0_0_w12:
+** mova {z0\.b - z3\.b}, za0v\.b\[w12, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_w12, svint8x4_t,
+ z0 = svread_ver_za8_s8_vg4 (0, w12),
+ z0 = svread_ver_za8_s8_vg4 (0, w12))
+
+/*
+** read_za8_u8_z18_0_w15:
+** mova {[^\n]+}, za0v\.b\[w15, 0:3\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x4_t,
+ z18 = svread_ver_za8_u8_vg4 (0, w15),
+ z18 = svread_ver_za8_u8_vg4 (0, w15))
+
+/*
+** read_za8_s8_z23_0_w12p12:
+** mova {[^\n]+}, za0v\.b\[w12, 12:15\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z23_0_w12p12, svint8x4_t,
+ z23 = svread_ver_za8_s8_vg4 (0, w12 + 12),
+ z23 = svread_ver_za8_s8_vg4 (0, w12 + 12))
+
+/*
+** read_za8_u8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x4_t,
+ z4 = svread_ver_za8_u8_vg4 (0, w12 + 1),
+ z4 = svread_ver_za8_u8_vg4 (0, w12 + 1))
+
+/*
+** read_za8_s8_z28_0_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z28_0_w12p2, svint8x4_t,
+ z28 = svread_ver_za8_s8_vg4 (0, w12 + 2),
+ z28 = svread_ver_za8_s8_vg4 (0, w12 + 2))
+
+/*
+** read_za8_u8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x4_t,
+ z0 = svread_ver_za8_u8_vg4 (0, w15 + 3),
+ z0 = svread_ver_za8_u8_vg4 (0, w15 + 3))
+
+/*
+** read_za8_u8_z0_0_w12p4:
+** mova {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z0_0_w12p4, svuint8x4_t,
+ z0 = svread_ver_za8_u8_vg4 (0, w12 + 4),
+ z0 = svread_ver_za8_u8_vg4 (0, w12 + 4))
+
+/*
+** read_za8_u8_z4_0_w15p12:
+** mova {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x4_t,
+ z4 = svread_ver_za8_u8_vg4 (0, w15 + 12),
+ z4 = svread_ver_za8_u8_vg4 (0, w15 + 12))
+
+/*
+** read_za8_u8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p14, svuint8x4_t,
+ z28 = svread_ver_za8_u8_vg4 (0, w12 + 14),
+ z28 = svread_ver_za8_u8_vg4 (0, w12 + 14))
+
+/*
+** read_za8_s8_z0_0_w15p16:
+** add (w[0-9]+), w15, #?16
+** mova {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_s8_z0_0_w15p16, svint8x4_t,
+ z0 = svread_ver_za8_s8_vg4 (0, w15 + 16),
+ z0 = svread_ver_za8_s8_vg4 (0, w15 + 16))
+
+/*
+** read_za8_u8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t,
+ z4 = svread_ver_za8_u8_vg4 (0, w12 - 1),
+ z4 = svread_ver_za8_u8_vg4 (0, w12 - 1))
+
+/*
+** read_za8_u8_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\]
+** ret
+*/
+TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t,
+ z28 = svread_ver_za8_u8_vg4 (0, w16),
+ z28 = svread_ver_za8_u8_vg4 (0, w16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_0_z0, svfloat16x2_t,
+ z0 = svread_za16_f16_vg1x2 (0),
+ z0 = svread_za16_f16_vg1x2 (0))
+
+/*
+** read_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w0_z0, svint16x2_t,
+ z0 = svread_za16_s16_vg1x2 (w0),
+ z0 = svread_za16_s16_vg1x2 (w0))
+
+/*
+** read_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w7_z0, svuint16x2_t,
+ z0 = svread_za16_u16_vg1x2 (w7),
+ z0 = svread_za16_u16_vg1x2 (w7))
+
+/*
+** read_w8_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z0, svbfloat16x2_t,
+ z0 = svread_za16_bf16_vg1x2 (w8),
+ z0 = svread_za16_bf16_vg1x2 (w8))
+
+/*
+** read_w11_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w11, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w11_z0, svint16x2_t,
+ z0 = svread_za16_s16_vg1x2 (w11),
+ z0 = svread_za16_s16_vg1x2 (w11))
+
+
+/*
+** read_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w12_z0, svuint16x2_t,
+ z0 = svread_za16_u16_vg1x2 (w12),
+ z0 = svread_za16_u16_vg1x2 (w12))
+
+/*
+** read_w8p7_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p7_z0, svfloat16x2_t,
+ z0 = svread_za16_f16_vg1x2 (w8 + 7),
+ z0 = svread_za16_f16_vg1x2 (w8 + 7))
+
+/*
+** read_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p8_z0, svint16x2_t,
+ z0 = svread_za16_s16_vg1x2 (w8 + 8),
+ z0 = svread_za16_s16_vg1x2 (w8 + 8))
+
+/*
+** read_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8m1_z0, svuint16x2_t,
+ z0 = svread_za16_u16_vg1x2 (w8 - 1),
+ z0 = svread_za16_u16_vg1x2 (w8 - 1))
+
+/*
+** read_w8_z18:
+** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z18, svfloat16x2_t,
+ z18 = svread_za16_f16_vg1x2 (w8),
+ z18 = svread_za16_f16_vg1x2 (w8))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** read_w8_z23:
+** mova [^\n]+, za\.d\[w8, 0, vgx2\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z23, svint16x2_t,
+ z23 = svread_za16_s16_vg1x2 (w8),
+ z23 = svread_za16_s16_vg1x2 (w8))
+
+/*
+** read_w8_z28:
+** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z28, svbfloat16x2_t,
+ z28 = svread_za16_bf16_vg1x2 (w8),
+ z28 = svread_za16_bf16_vg1x2 (w8))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_0_z0, svint16x4_t,
+ z0 = svread_za16_s16_vg1x4 (0),
+ z0 = svread_za16_s16_vg1x4 (0))
+
+/*
+** read_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w0_z0, svuint16x4_t,
+ z0 = svread_za16_u16_vg1x4 (w0),
+ z0 = svread_za16_u16_vg1x4 (w0))
+
+/*
+** read_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w7_z0, svfloat16x4_t,
+ z0 = svread_za16_f16_vg1x4 (w7),
+ z0 = svread_za16_f16_vg1x4 (w7))
+
+/*
+** read_w8_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z0, svint16x4_t,
+ z0 = svread_za16_s16_vg1x4 (w8),
+ z0 = svread_za16_s16_vg1x4 (w8))
+
+/*
+** read_w11_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w11_z0, svuint16x4_t,
+ z0 = svread_za16_u16_vg1x4 (w11),
+ z0 = svread_za16_u16_vg1x4 (w11))
+
+
+/*
+** read_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w12_z0, svbfloat16x4_t,
+ z0 = svread_za16_bf16_vg1x4 (w12),
+ z0 = svread_za16_bf16_vg1x4 (w12))
+
+/*
+** read_w8p7_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w8, 7, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p7_z0, svint16x4_t,
+ z0 = svread_za16_s16_vg1x4 (w8 + 7),
+ z0 = svread_za16_s16_vg1x4 (w8 + 7))
+
+/*
+** read_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p8_z0, svuint16x4_t,
+ z0 = svread_za16_u16_vg1x4 (w8 + 8),
+ z0 = svread_za16_u16_vg1x4 (w8 + 8))
+
+/*
+** read_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8m1_z0, svfloat16x4_t,
+ z0 = svread_za16_f16_vg1x4 (w8 - 1),
+ z0 = svread_za16_f16_vg1x4 (w8 - 1))
+
+/*
+** read_w8_z4:
+** mova {z4\.d - z7\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z4, svint16x4_t,
+ z4 = svread_za16_s16_vg1x4 (w8),
+ z4 = svread_za16_s16_vg1x4 (w8))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** read_w8_z18:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z18, svuint16x4_t,
+ z18 = svread_za16_u16_vg1x4 (w8),
+ z18 = svread_za16_u16_vg1x4 (w8))
+
+/*
+** read_w8_z23:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z23, svbfloat16x4_t,
+ z23 = svread_za16_bf16_vg1x4 (w8),
+ z23 = svread_za16_bf16_vg1x4 (w8))
+
+/*
+** read_w8_z28:
+** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z28, svint16x4_t,
+ z28 = svread_za16_s16_vg1x4 (w8),
+ z28 = svread_za16_s16_vg1x4 (w8))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_0_z0, svfloat32x2_t,
+ z0 = svread_za32_f32_vg1x2 (0),
+ z0 = svread_za32_f32_vg1x2 (0))
+
+/*
+** read_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w0_z0, svint32x2_t,
+ z0 = svread_za32_s32_vg1x2 (w0),
+ z0 = svread_za32_s32_vg1x2 (w0))
+
+/*
+** read_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w7_z0, svuint32x2_t,
+ z0 = svread_za32_u32_vg1x2 (w7),
+ z0 = svread_za32_u32_vg1x2 (w7))
+
+/*
+** read_w8_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z0, svfloat32x2_t,
+ z0 = svread_za32_f32_vg1x2 (w8),
+ z0 = svread_za32_f32_vg1x2 (w8))
+
+/*
+** read_w11_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w11, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w11_z0, svint32x2_t,
+ z0 = svread_za32_s32_vg1x2 (w11),
+ z0 = svread_za32_s32_vg1x2 (w11))
+
+
+/*
+** read_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w12_z0, svuint32x2_t,
+ z0 = svread_za32_u32_vg1x2 (w12),
+ z0 = svread_za32_u32_vg1x2 (w12))
+
+/*
+** read_w8p7_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p7_z0, svfloat32x2_t,
+ z0 = svread_za32_f32_vg1x2 (w8 + 7),
+ z0 = svread_za32_f32_vg1x2 (w8 + 7))
+
+/*
+** read_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p8_z0, svint32x2_t,
+ z0 = svread_za32_s32_vg1x2 (w8 + 8),
+ z0 = svread_za32_s32_vg1x2 (w8 + 8))
+
+/*
+** read_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8m1_z0, svuint32x2_t,
+ z0 = svread_za32_u32_vg1x2 (w8 - 1),
+ z0 = svread_za32_u32_vg1x2 (w8 - 1))
+
+/*
+** read_w8_z18:
+** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z18, svfloat32x2_t,
+ z18 = svread_za32_f32_vg1x2 (w8),
+ z18 = svread_za32_f32_vg1x2 (w8))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** read_w8_z23:
+** mova [^\n]+, za\.d\[w8, 0, vgx2\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z23, svint32x2_t,
+ z23 = svread_za32_s32_vg1x2 (w8),
+ z23 = svread_za32_s32_vg1x2 (w8))
+
+/*
+** read_w8_z28:
+** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z28, svuint32x2_t,
+ z28 = svread_za32_u32_vg1x2 (w8),
+ z28 = svread_za32_u32_vg1x2 (w8))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_0_z0, svint32x4_t,
+ z0 = svread_za32_s32_vg1x4 (0),
+ z0 = svread_za32_s32_vg1x4 (0))
+
+/*
+** read_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w0_z0, svuint32x4_t,
+ z0 = svread_za32_u32_vg1x4 (w0),
+ z0 = svread_za32_u32_vg1x4 (w0))
+
+/*
+** read_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w7_z0, svfloat32x4_t,
+ z0 = svread_za32_f32_vg1x4 (w7),
+ z0 = svread_za32_f32_vg1x4 (w7))
+
+/*
+** read_w8_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z0, svint32x4_t,
+ z0 = svread_za32_s32_vg1x4 (w8),
+ z0 = svread_za32_s32_vg1x4 (w8))
+
+/*
+** read_w11_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w11_z0, svuint32x4_t,
+ z0 = svread_za32_u32_vg1x4 (w11),
+ z0 = svread_za32_u32_vg1x4 (w11))
+
+
+/*
+** read_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w12_z0, svfloat32x4_t,
+ z0 = svread_za32_f32_vg1x4 (w12),
+ z0 = svread_za32_f32_vg1x4 (w12))
+
+/*
+** read_w8p7_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w8, 7, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p7_z0, svint32x4_t,
+ z0 = svread_za32_s32_vg1x4 (w8 + 7),
+ z0 = svread_za32_s32_vg1x4 (w8 + 7))
+
+/*
+** read_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p8_z0, svuint32x4_t,
+ z0 = svread_za32_u32_vg1x4 (w8 + 8),
+ z0 = svread_za32_u32_vg1x4 (w8 + 8))
+
+/*
+** read_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8m1_z0, svfloat32x4_t,
+ z0 = svread_za32_f32_vg1x4 (w8 - 1),
+ z0 = svread_za32_f32_vg1x4 (w8 - 1))
+
+/*
+** read_w8_z4:
+** mova {z4\.d - z7\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z4, svint32x4_t,
+ z4 = svread_za32_s32_vg1x4 (w8),
+ z4 = svread_za32_s32_vg1x4 (w8))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** read_w8_z18:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z18, svuint32x4_t,
+ z18 = svread_za32_u32_vg1x4 (w8),
+ z18 = svread_za32_u32_vg1x4 (w8))
+
+/*
+** read_w8_z23:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z23, svfloat32x4_t,
+ z23 = svread_za32_f32_vg1x4 (w8),
+ z23 = svread_za32_f32_vg1x4 (w8))
+
+/*
+** read_w8_z28:
+** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z28, svint32x4_t,
+ z28 = svread_za32_s32_vg1x4 (w8),
+ z28 = svread_za32_s32_vg1x4 (w8))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_0_z0, svfloat64x2_t,
+ z0 = svread_za64_f64_vg1x2 (0),
+ z0 = svread_za64_f64_vg1x2 (0))
+
+/*
+** read_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w0_z0, svint64x2_t,
+ z0 = svread_za64_s64_vg1x2 (w0),
+ z0 = svread_za64_s64_vg1x2 (w0))
+
+/*
+** read_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w7_z0, svuint64x2_t,
+ z0 = svread_za64_u64_vg1x2 (w7),
+ z0 = svread_za64_u64_vg1x2 (w7))
+
+/*
+** read_w8_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z0, svfloat64x2_t,
+ z0 = svread_za64_f64_vg1x2 (w8),
+ z0 = svread_za64_f64_vg1x2 (w8))
+
+/*
+** read_w11_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w11, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w11_z0, svint64x2_t,
+ z0 = svread_za64_s64_vg1x2 (w11),
+ z0 = svread_za64_s64_vg1x2 (w11))
+
+
+/*
+** read_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w12_z0, svuint64x2_t,
+ z0 = svread_za64_u64_vg1x2 (w12),
+ z0 = svread_za64_u64_vg1x2 (w12))
+
+/*
+** read_w8p7_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p7_z0, svfloat64x2_t,
+ z0 = svread_za64_f64_vg1x2 (w8 + 7),
+ z0 = svread_za64_f64_vg1x2 (w8 + 7))
+
+/*
+** read_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p8_z0, svint64x2_t,
+ z0 = svread_za64_s64_vg1x2 (w8 + 8),
+ z0 = svread_za64_s64_vg1x2 (w8 + 8))
+
+/*
+** read_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8m1_z0, svuint64x2_t,
+ z0 = svread_za64_u64_vg1x2 (w8 - 1),
+ z0 = svread_za64_u64_vg1x2 (w8 - 1))
+
+/*
+** read_w8_z18:
+** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z18, svfloat64x2_t,
+ z18 = svread_za64_f64_vg1x2 (w8),
+ z18 = svread_za64_f64_vg1x2 (w8))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** read_w8_z23:
+** mova [^\n]+, za\.d\[w8, 0, vgx2\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z23, svint64x2_t,
+ z23 = svread_za64_s64_vg1x2 (w8),
+ z23 = svread_za64_s64_vg1x2 (w8))
+
+/*
+** read_w8_z28:
+** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z28, svuint64x2_t,
+ z28 = svread_za64_u64_vg1x2 (w8),
+ z28 = svread_za64_u64_vg1x2 (w8))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_0_z0, svint64x4_t,
+ z0 = svread_za64_s64_vg1x4 (0),
+ z0 = svread_za64_s64_vg1x4 (0))
+
+/*
+** read_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w0_z0, svuint64x4_t,
+ z0 = svread_za64_u64_vg1x4 (w0),
+ z0 = svread_za64_u64_vg1x4 (w0))
+
+/*
+** read_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w7_z0, svfloat64x4_t,
+ z0 = svread_za64_f64_vg1x4 (w7),
+ z0 = svread_za64_f64_vg1x4 (w7))
+
+/*
+** read_w8_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z0, svint64x4_t,
+ z0 = svread_za64_s64_vg1x4 (w8),
+ z0 = svread_za64_s64_vg1x4 (w8))
+
+/*
+** read_w11_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w11_z0, svuint64x4_t,
+ z0 = svread_za64_u64_vg1x4 (w11),
+ z0 = svread_za64_u64_vg1x4 (w11))
+
+
+/*
+** read_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w12_z0, svfloat64x4_t,
+ z0 = svread_za64_f64_vg1x4 (w12),
+ z0 = svread_za64_f64_vg1x4 (w12))
+
+/*
+** read_w8p7_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w8, 7, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p7_z0, svint64x4_t,
+ z0 = svread_za64_s64_vg1x4 (w8 + 7),
+ z0 = svread_za64_s64_vg1x4 (w8 + 7))
+
+/*
+** read_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p8_z0, svuint64x4_t,
+ z0 = svread_za64_u64_vg1x4 (w8 + 8),
+ z0 = svread_za64_u64_vg1x4 (w8 + 8))
+
+/*
+** read_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8m1_z0, svfloat64x4_t,
+ z0 = svread_za64_f64_vg1x4 (w8 - 1),
+ z0 = svread_za64_f64_vg1x4 (w8 - 1))
+
+/*
+** read_w8_z4:
+** mova {z4\.d - z7\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z4, svint64x4_t,
+ z4 = svread_za64_s64_vg1x4 (w8),
+ z4 = svread_za64_s64_vg1x4 (w8))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** read_w8_z18:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z18, svuint64x4_t,
+ z18 = svread_za64_u64_vg1x4 (w8),
+ z18 = svread_za64_u64_vg1x4 (w8))
+
+/*
+** read_w8_z23:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z23, svfloat64x4_t,
+ z23 = svread_za64_f64_vg1x4 (w8),
+ z23 = svread_za64_f64_vg1x4 (w8))
+
+/*
+** read_w8_z28:
+** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z28, svint64x4_t,
+ z28 = svread_za64_s64_vg1x4 (w8),
+ z28 = svread_za64_s64_vg1x4 (w8))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_0_z0, svint8x2_t,
+ z0 = svread_za8_s8_vg1x2 (0),
+ z0 = svread_za8_s8_vg1x2 (0))
+
+/*
+** read_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w0_z0, svint8x2_t,
+ z0 = svread_za8_s8_vg1x2 (w0),
+ z0 = svread_za8_s8_vg1x2 (w0))
+
+/*
+** read_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w7_z0, svuint8x2_t,
+ z0 = svread_za8_u8_vg1x2 (w7),
+ z0 = svread_za8_u8_vg1x2 (w7))
+
+/*
+** read_w8_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z0, svint8x2_t,
+ z0 = svread_za8_s8_vg1x2 (w8),
+ z0 = svread_za8_s8_vg1x2 (w8))
+
+/*
+** read_w11_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w11, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w11_z0, svint8x2_t,
+ z0 = svread_za8_s8_vg1x2 (w11),
+ z0 = svread_za8_s8_vg1x2 (w11))
+
+
+/*
+** read_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w12_z0, svuint8x2_t,
+ z0 = svread_za8_u8_vg1x2 (w12),
+ z0 = svread_za8_u8_vg1x2 (w12))
+
+/*
+** read_w8p7_z0:
+** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p7_z0, svint8x2_t,
+ z0 = svread_za8_s8_vg1x2 (w8 + 7),
+ z0 = svread_za8_s8_vg1x2 (w8 + 7))
+
+/*
+** read_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p8_z0, svint8x2_t,
+ z0 = svread_za8_s8_vg1x2 (w8 + 8),
+ z0 = svread_za8_s8_vg1x2 (w8 + 8))
+
+/*
+** read_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8m1_z0, svuint8x2_t,
+ z0 = svread_za8_u8_vg1x2 (w8 - 1),
+ z0 = svread_za8_u8_vg1x2 (w8 - 1))
+
+/*
+** read_w8_z18:
+** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z18, svuint8x2_t,
+ z18 = svread_za8_u8_vg1x2 (w8),
+ z18 = svread_za8_u8_vg1x2 (w8))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** read_w8_z23:
+** mova [^\n]+, za\.d\[w8, 0, vgx2\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z23, svint8x2_t,
+ z23 = svread_za8_s8_vg1x2 (w8),
+ z23 = svread_za8_s8_vg1x2 (w8))
+
+/*
+** read_w8_z28:
+** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z28, svuint8x2_t,
+ z28 = svread_za8_u8_vg1x2 (w8),
+ z28 = svread_za8_u8_vg1x2 (w8))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** read_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_0_z0, svint8x4_t,
+ z0 = svread_za8_s8_vg1x4 (0),
+ z0 = svread_za8_s8_vg1x4 (0))
+
+/*
+** read_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w0_z0, svuint8x4_t,
+ z0 = svread_za8_u8_vg1x4 (w0),
+ z0 = svread_za8_u8_vg1x4 (w0))
+
+/*
+** read_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w7_z0, svint8x4_t,
+ z0 = svread_za8_s8_vg1x4 (w7),
+ z0 = svread_za8_s8_vg1x4 (w7))
+
+/*
+** read_w8_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z0, svint8x4_t,
+ z0 = svread_za8_s8_vg1x4 (w8),
+ z0 = svread_za8_s8_vg1x4 (w8))
+
+/*
+** read_w11_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w11_z0, svuint8x4_t,
+ z0 = svread_za8_u8_vg1x4 (w11),
+ z0 = svread_za8_u8_vg1x4 (w11))
+
+
+/*
+** read_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w12_z0, svint8x4_t,
+ z0 = svread_za8_s8_vg1x4 (w12),
+ z0 = svread_za8_s8_vg1x4 (w12))
+
+/*
+** read_w8p7_z0:
+** mova {z0\.d - z3\.d}, za\.d\[w8, 7, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p7_z0, svint8x4_t,
+ z0 = svread_za8_s8_vg1x4 (w8 + 7),
+ z0 = svread_za8_s8_vg1x4 (w8 + 7))
+
+/*
+** read_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8p8_z0, svuint8x4_t,
+ z0 = svread_za8_u8_vg1x4 (w8 + 8),
+ z0 = svread_za8_u8_vg1x4 (w8 + 8))
+
+/*
+** read_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8m1_z0, svint8x4_t,
+ z0 = svread_za8_s8_vg1x4 (w8 - 1),
+ z0 = svread_za8_s8_vg1x4 (w8 - 1))
+
+/*
+** read_w8_z4:
+** mova {z4\.d - z7\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z4, svint8x4_t,
+ z4 = svread_za8_s8_vg1x4 (w8),
+ z4 = svread_za8_s8_vg1x4 (w8))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** read_w8_z18:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z18, svuint8x4_t,
+ z18 = svread_za8_u8_vg1x4 (w8),
+ z18 = svread_za8_u8_vg1x4 (w8))
+
+/*
+** read_w8_z23:
+** mova [^\n]+, za\.d\[w8, 0, vgx4\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z23, svuint8x4_t,
+ z23 = svread_za8_u8_vg1x4 (w8),
+ z23 = svread_za8_u8_vg1x4 (w8))
+
+/*
+** read_w8_z28:
+** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\]
+** ret
+*/
+TEST_READ_ZA_XN (read_w8_z28, svint8x4_t,
+ z28 = svread_za8_s8_vg1x4 (w8),
+ z28 = svread_za8_s8_vg1x4 (w8))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rinta_z0_z0:
+** frinta {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (rinta_z0_z0, svfloat32x2_t, z0,
+ svrinta_f32_x2 (z0),
+ svrinta (z0))
+
+/*
+** rinta_z0_z4:
+** frinta {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (rinta_z0_z4, svfloat32x2_t, z0,
+ svrinta_f32_x2 (z4),
+ svrinta (z4))
+
+/*
+** rinta_z4_z18:
+** frinta {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (rinta_z4_z18, svfloat32x2_t, z4,
+ svrinta_f32_x2 (z18),
+ svrinta (z18))
+
+/*
+** rinta_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** frinta {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_XN (rinta_z18_z23, svfloat32x2_t, z18,
+ svrinta_f32_x2 (z23),
+ svrinta (z23))
+
+/*
+** rinta_z23_z28:
+** frinta [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rinta_z23_z28, svfloat32x2_t, z23,
+ svrinta_f32_x2 (z28),
+ svrinta (z28))
+
+/*
+** rinta_z28_z0:
+** frinta {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (rinta_z28_z0, svfloat32x2_t, z28,
+ svrinta_f32_x2 (z0),
+ svrinta (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rinta_z0_z0:
+** frinta {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (rinta_z0_z0, svfloat32x4_t, z0,
+ svrinta_f32_x4 (z0),
+ svrinta (z0))
+
+/*
+** rinta_z0_z4:
+** frinta {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (rinta_z0_z4, svfloat32x4_t, z0,
+ svrinta_f32_x4 (z4),
+ svrinta (z4))
+
+/*
+** rinta_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** frinta {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (rinta_z4_z18, svfloat32x4_t, z4,
+ svrinta_f32_x4 (z18),
+ svrinta (z18))
+
+/*
+** rinta_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** frinta {z[^\n]+}, {z.*}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rinta_z18_z23, svfloat32x4_t, z18,
+ svrinta_f32_x4 (z23),
+ svrinta (z23))
+
+/*
+** rinta_z23_z28:
+** frinta [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rinta_z23_z28, svfloat32x4_t, z23,
+ svrinta_f32_x4 (z28),
+ svrinta (z28))
+
+/*
+** rinta_z28_z0:
+** frinta {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (rinta_z28_z0, svfloat32x4_t, z28,
+ svrinta_f32_x4 (z0),
+ svrinta (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rintm_z0_z0:
+** frintm {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (rintm_z0_z0, svfloat32x2_t, z0,
+ svrintm_f32_x2 (z0),
+ svrintm (z0))
+
+/*
+** rintm_z0_z4:
+** frintm {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (rintm_z0_z4, svfloat32x2_t, z0,
+ svrintm_f32_x2 (z4),
+ svrintm (z4))
+
+/*
+** rintm_z4_z18:
+** frintm {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (rintm_z4_z18, svfloat32x2_t, z4,
+ svrintm_f32_x2 (z18),
+ svrintm (z18))
+
+/*
+** rintm_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** frintm {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_XN (rintm_z18_z23, svfloat32x2_t, z18,
+ svrintm_f32_x2 (z23),
+ svrintm (z23))
+
+/*
+** rintm_z23_z28:
+** frintm [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rintm_z23_z28, svfloat32x2_t, z23,
+ svrintm_f32_x2 (z28),
+ svrintm (z28))
+
+/*
+** rintm_z28_z0:
+** frintm {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (rintm_z28_z0, svfloat32x2_t, z28,
+ svrintm_f32_x2 (z0),
+ svrintm (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rintm_z0_z0:
+** frintm {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (rintm_z0_z0, svfloat32x4_t, z0,
+ svrintm_f32_x4 (z0),
+ svrintm (z0))
+
+/*
+** rintm_z0_z4:
+** frintm {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (rintm_z0_z4, svfloat32x4_t, z0,
+ svrintm_f32_x4 (z4),
+ svrintm (z4))
+
+/*
+** rintm_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** frintm {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (rintm_z4_z18, svfloat32x4_t, z4,
+ svrintm_f32_x4 (z18),
+ svrintm (z18))
+
+/*
+** rintm_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** frintm {z[^\n]+}, {z.*}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rintm_z18_z23, svfloat32x4_t, z18,
+ svrintm_f32_x4 (z23),
+ svrintm (z23))
+
+/*
+** rintm_z23_z28:
+** frintm [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rintm_z23_z28, svfloat32x4_t, z23,
+ svrintm_f32_x4 (z28),
+ svrintm (z28))
+
+/*
+** rintm_z28_z0:
+** frintm {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (rintm_z28_z0, svfloat32x4_t, z28,
+ svrintm_f32_x4 (z0),
+ svrintm (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rintn_z0_z0:
+** frintn {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (rintn_z0_z0, svfloat32x2_t, z0,
+ svrintn_f32_x2 (z0),
+ svrintn (z0))
+
+/*
+** rintn_z0_z4:
+** frintn {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (rintn_z0_z4, svfloat32x2_t, z0,
+ svrintn_f32_x2 (z4),
+ svrintn (z4))
+
+/*
+** rintn_z4_z18:
+** frintn {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (rintn_z4_z18, svfloat32x2_t, z4,
+ svrintn_f32_x2 (z18),
+ svrintn (z18))
+
+/*
+** rintn_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** frintn {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_XN (rintn_z18_z23, svfloat32x2_t, z18,
+ svrintn_f32_x2 (z23),
+ svrintn (z23))
+
+/*
+** rintn_z23_z28:
+** frintn [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rintn_z23_z28, svfloat32x2_t, z23,
+ svrintn_f32_x2 (z28),
+ svrintn (z28))
+
+/*
+** rintn_z28_z0:
+** frintn {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (rintn_z28_z0, svfloat32x2_t, z28,
+ svrintn_f32_x2 (z0),
+ svrintn (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rintn_z0_z0:
+** frintn {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (rintn_z0_z0, svfloat32x4_t, z0,
+ svrintn_f32_x4 (z0),
+ svrintn (z0))
+
+/*
+** rintn_z0_z4:
+** frintn {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (rintn_z0_z4, svfloat32x4_t, z0,
+ svrintn_f32_x4 (z4),
+ svrintn (z4))
+
+/*
+** rintn_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** frintn {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (rintn_z4_z18, svfloat32x4_t, z4,
+ svrintn_f32_x4 (z18),
+ svrintn (z18))
+
+/*
+** rintn_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** frintn {z[^\n]+}, {z.*}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rintn_z18_z23, svfloat32x4_t, z18,
+ svrintn_f32_x4 (z23),
+ svrintn (z23))
+
+/*
+** rintn_z23_z28:
+** frintn [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rintn_z23_z28, svfloat32x4_t, z23,
+ svrintn_f32_x4 (z28),
+ svrintn (z28))
+
+/*
+** rintn_z28_z0:
+** frintn {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (rintn_z28_z0, svfloat32x4_t, z28,
+ svrintn_f32_x4 (z0),
+ svrintn (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rintp_z0_z0:
+** frintp {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (rintp_z0_z0, svfloat32x2_t, z0,
+ svrintp_f32_x2 (z0),
+ svrintp (z0))
+
+/*
+** rintp_z0_z4:
+** frintp {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (rintp_z0_z4, svfloat32x2_t, z0,
+ svrintp_f32_x2 (z4),
+ svrintp (z4))
+
+/*
+** rintp_z4_z18:
+** frintp {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (rintp_z4_z18, svfloat32x2_t, z4,
+ svrintp_f32_x2 (z18),
+ svrintp (z18))
+
+/*
+** rintp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** frintp {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_XN (rintp_z18_z23, svfloat32x2_t, z18,
+ svrintp_f32_x2 (z23),
+ svrintp (z23))
+
+/*
+** rintp_z23_z28:
+** frintp [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rintp_z23_z28, svfloat32x2_t, z23,
+ svrintp_f32_x2 (z28),
+ svrintp (z28))
+
+/*
+** rintp_z28_z0:
+** frintp {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (rintp_z28_z0, svfloat32x2_t, z28,
+ svrintp_f32_x2 (z0),
+ svrintp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rintp_z0_z0:
+** frintp {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (rintp_z0_z0, svfloat32x4_t, z0,
+ svrintp_f32_x4 (z0),
+ svrintp (z0))
+
+/*
+** rintp_z0_z4:
+** frintp {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (rintp_z0_z4, svfloat32x4_t, z0,
+ svrintp_f32_x4 (z4),
+ svrintp (z4))
+
+/*
+** rintp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** frintp {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (rintp_z4_z18, svfloat32x4_t, z4,
+ svrintp_f32_x4 (z18),
+ svrintp (z18))
+
+/*
+** rintp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** frintp {z[^\n]+}, {z.*}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rintp_z18_z23, svfloat32x4_t, z18,
+ svrintp_f32_x4 (z23),
+ svrintp (z23))
+
+/*
+** rintp_z23_z28:
+** frintp [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rintp_z23_z28, svfloat32x4_t, z23,
+ svrintp_f32_x4 (z28),
+ svrintp (z28))
+
+/*
+** rintp_z28_z0:
+** frintp {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (rintp_z28_z0, svfloat32x4_t, z28,
+ svrintp_f32_x4 (z0),
+ svrintp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** srshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z4, svint16x2_t, z0,
+ svrshl_s16_x2 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z0_z4_z0:
+** srshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (rshl_z0_z4_z0, svint16x2_t, z0,
+ svrshl_s16_x2 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.h - z29\.h}
+** |
+** srshl [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z4_z28, svint16x2_t, z0,
+ svrshl_s16_x2 (z4, z28),
+ svrshl (z4, z28))
+
+/*
+** rshl_z18_z18_z4:
+** srshl {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (rshl_z18_z18_z4, svint16x2_t, z18,
+ svrshl_s16_x2 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z23_z23_z18, svint16x2_t, z23,
+ svrshl_s16_x2 (z23, z18),
+ svrshl (z23, z18))
+
+/*
+** rshl_z28_z28_z0:
+** srshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (rshl_z28_z28_z0, svint16x2_t, z28,
+ svrshl_s16_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_z0_z0_z18:
+** srshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z18, svint16x2_t, z0,
+ svrshl_s16_x2 (z0, z18),
+ svrshl (z0, z18))
+
+/*
+** rshl_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** srshl {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z4_z4_z23, svint16x2_t, z4,
+ svrshl_s16_x2 (z4, z23),
+ svrshl (z4, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint16x2_t, svint16_t, z24,
+ svrshl_single_s16_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** srshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint16x2_t, svint16_t, z24,
+ svrshl_single_s16_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint16x2_t, svint16_t, z24,
+ svrshl_single_s16_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint16x2_t, svint16_t, z1,
+ svrshl_single_s16_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** srshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint16x2_t, svint16_t, z1,
+ svrshl_single_s16_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** srshl {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint16x2_t, svint16_t, z18,
+ svrshl_single_s16_x2 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** srshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint16x2_t, svint16_t,
+ z0_res = svrshl_single_s16_x2 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** srshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint16x2_t, svint16_t,
+ z0 = svrshl_single_s16_x2 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint16x2_t, svint16_t, z24,
+ svrshl_single_s16_x2 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z4, svint16x4_t, z0,
+ svrshl_s16_x4 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z0_z4_z0:
+** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (rshl_z0_z4_z0, svint16x4_t, z0,
+ svrshl_s16_x4 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.h - z31\.h}
+** |
+** srshl [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z4_z28, svint16x4_t, z0,
+ svrshl_s16_x4 (z4, z28),
+ svrshl (z4, z28))
+
+/*
+** rshl_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z18_z18_z4, svint16x4_t, z18,
+ svrshl_s16_x4 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z23_z23_z28, svint16x4_t, z23,
+ svrshl_s16_x4 (z23, z28),
+ svrshl (z23, z28))
+
+/*
+** rshl_z28_z28_z0:
+** srshl {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (rshl_z28_z28_z0, svint16x4_t, z28,
+ svrshl_s16_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z0_z18, svint16x4_t, z0,
+ svrshl_s16_x4 (z0, z18),
+ svrshl (z0, z18))
+
+/*
+** rshl_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** srshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z4_z4_z23, svint16x4_t, z4,
+ svrshl_s16_x4 (z4, z23),
+ svrshl (z4, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint16x4_t, svint16_t, z24,
+ svrshl_single_s16_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** srshl {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint16x4_t, svint16_t, z24,
+ svrshl_single_s16_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint16x4_t, svint16_t, z24,
+ svrshl_single_s16_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint16x4_t, svint16_t, z1,
+ svrshl_single_s16_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint16x4_t, svint16_t, z1,
+ svrshl_single_s16_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint16x4_t, svint16_t, z18,
+ svrshl_single_s16_x4 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** srshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint16x4_t, svint16_t,
+ z0_res = svrshl_single_s16_x4 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint16x4_t, svint16_t,
+ z0 = svrshl_single_s16_x4 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint16x4_t, svint16_t, z24,
+ svrshl_single_s16_x4 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** srshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z4, svint32x2_t, z0,
+ svrshl_s32_x2 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z0_z4_z0:
+** srshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (rshl_z0_z4_z0, svint32x2_t, z0,
+ svrshl_s32_x2 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.s - z29\.s}
+** |
+** srshl [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z4_z28, svint32x2_t, z0,
+ svrshl_s32_x2 (z4, z28),
+ svrshl (z4, z28))
+
+/*
+** rshl_z18_z18_z4:
+** srshl {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (rshl_z18_z18_z4, svint32x2_t, z18,
+ svrshl_s32_x2 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z23_z23_z18, svint32x2_t, z23,
+ svrshl_s32_x2 (z23, z18),
+ svrshl (z23, z18))
+
+/*
+** rshl_z28_z28_z0:
+** srshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (rshl_z28_z28_z0, svint32x2_t, z28,
+ svrshl_s32_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_z0_z0_z18:
+** srshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z18, svint32x2_t, z0,
+ svrshl_s32_x2 (z0, z18),
+ svrshl (z0, z18))
+
+/*
+** rshl_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** srshl {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z4_z4_z23, svint32x2_t, z4,
+ svrshl_s32_x2 (z4, z23),
+ svrshl (z4, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint32x2_t, svint32_t, z24,
+ svrshl_single_s32_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** srshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint32x2_t, svint32_t, z24,
+ svrshl_single_s32_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint32x2_t, svint32_t, z24,
+ svrshl_single_s32_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint32x2_t, svint32_t, z1,
+ svrshl_single_s32_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** srshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint32x2_t, svint32_t, z1,
+ svrshl_single_s32_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** srshl {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint32x2_t, svint32_t, z18,
+ svrshl_single_s32_x2 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** srshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint32x2_t, svint32_t,
+ z0_res = svrshl_single_s32_x2 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** srshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint32x2_t, svint32_t,
+ z0 = svrshl_single_s32_x2 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint32x2_t, svint32_t, z24,
+ svrshl_single_s32_x2 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z4, svint32x4_t, z0,
+ svrshl_s32_x4 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z0_z4_z0:
+** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (rshl_z0_z4_z0, svint32x4_t, z0,
+ svrshl_s32_x4 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.s - z31\.s}
+** |
+** srshl [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z4_z28, svint32x4_t, z0,
+ svrshl_s32_x4 (z4, z28),
+ svrshl (z4, z28))
+
+/*
+** rshl_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z18_z18_z4, svint32x4_t, z18,
+ svrshl_s32_x4 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z23_z23_z28, svint32x4_t, z23,
+ svrshl_s32_x4 (z23, z28),
+ svrshl (z23, z28))
+
+/*
+** rshl_z28_z28_z0:
+** srshl {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (rshl_z28_z28_z0, svint32x4_t, z28,
+ svrshl_s32_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z0_z18, svint32x4_t, z0,
+ svrshl_s32_x4 (z0, z18),
+ svrshl (z0, z18))
+
+/*
+** rshl_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** srshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z4_z4_z23, svint32x4_t, z4,
+ svrshl_s32_x4 (z4, z23),
+ svrshl (z4, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint32x4_t, svint32_t, z24,
+ svrshl_single_s32_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** srshl {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint32x4_t, svint32_t, z24,
+ svrshl_single_s32_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint32x4_t, svint32_t, z24,
+ svrshl_single_s32_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint32x4_t, svint32_t, z1,
+ svrshl_single_s32_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint32x4_t, svint32_t, z1,
+ svrshl_single_s32_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint32x4_t, svint32_t, z18,
+ svrshl_single_s32_x4 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** srshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint32x4_t, svint32_t,
+ z0_res = svrshl_single_s32_x4 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint32x4_t, svint32_t,
+ z0 = svrshl_single_s32_x4 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint32x4_t, svint32_t, z24,
+ svrshl_single_s32_x4 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** srshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z4, svint64x2_t, z0,
+ svrshl_s64_x2 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z0_z4_z0:
+** srshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (rshl_z0_z4_z0, svint64x2_t, z0,
+ svrshl_s64_x2 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.d - z29\.d}
+** |
+** srshl [^\n]+, {z28\.d - z29\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z4_z28, svint64x2_t, z0,
+ svrshl_s64_x2 (z4, z28),
+ svrshl (z4, z28))
+
+/*
+** rshl_z18_z18_z4:
+** srshl {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (rshl_z18_z18_z4, svint64x2_t, z18,
+ svrshl_s64_x2 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z23_z23_z18, svint64x2_t, z23,
+ svrshl_s64_x2 (z23, z18),
+ svrshl (z23, z18))
+
+/*
+** rshl_z28_z28_z0:
+** srshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (rshl_z28_z28_z0, svint64x2_t, z28,
+ svrshl_s64_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_z0_z0_z18:
+** srshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z18, svint64x2_t, z0,
+ svrshl_s64_x2 (z0, z18),
+ svrshl (z0, z18))
+
+/*
+** rshl_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** srshl {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z4_z4_z23, svint64x2_t, z4,
+ svrshl_s64_x2 (z4, z23),
+ svrshl (z4, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint64x2_t, svint64_t, z24,
+ svrshl_single_s64_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** srshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint64x2_t, svint64_t, z24,
+ svrshl_single_s64_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint64x2_t, svint64_t, z24,
+ svrshl_single_s64_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint64x2_t, svint64_t, z1,
+ svrshl_single_s64_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** srshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint64x2_t, svint64_t, z1,
+ svrshl_single_s64_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** srshl {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint64x2_t, svint64_t, z18,
+ svrshl_single_s64_x2 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** srshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint64x2_t, svint64_t,
+ z0_res = svrshl_single_s64_x2 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** srshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint64x2_t, svint64_t,
+ z0 = svrshl_single_s64_x2 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint64x2_t, svint64_t, z24,
+ svrshl_single_s64_x2 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z4, svint64x4_t, z0,
+ svrshl_s64_x4 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z0_z4_z0:
+** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (rshl_z0_z4_z0, svint64x4_t, z0,
+ svrshl_s64_x4 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.d - z31\.d}
+** |
+** srshl [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z4_z28, svint64x4_t, z0,
+ svrshl_s64_x4 (z4, z28),
+ svrshl (z4, z28))
+
+/*
+** rshl_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z18_z18_z4, svint64x4_t, z18,
+ svrshl_s64_x4 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z23_z23_z28, svint64x4_t, z23,
+ svrshl_s64_x4 (z23, z28),
+ svrshl (z23, z28))
+
+/*
+** rshl_z28_z28_z0:
+** srshl {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (rshl_z28_z28_z0, svint64x4_t, z28,
+ svrshl_s64_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z0_z18, svint64x4_t, z0,
+ svrshl_s64_x4 (z0, z18),
+ svrshl (z0, z18))
+
+/*
+** rshl_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** srshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z4_z4_z23, svint64x4_t, z4,
+ svrshl_s64_x4 (z4, z23),
+ svrshl (z4, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint64x4_t, svint64_t, z24,
+ svrshl_single_s64_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** srshl {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint64x4_t, svint64_t, z24,
+ svrshl_single_s64_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint64x4_t, svint64_t, z24,
+ svrshl_single_s64_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint64x4_t, svint64_t, z1,
+ svrshl_single_s64_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint64x4_t, svint64_t, z1,
+ svrshl_single_s64_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint64x4_t, svint64_t, z18,
+ svrshl_single_s64_x4 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** srshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint64x4_t, svint64_t,
+ z0_res = svrshl_single_s64_x4 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint64x4_t, svint64_t,
+ z0 = svrshl_single_s64_x4 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint64x4_t, svint64_t, z24,
+ svrshl_single_s64_x4 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** srshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z4, svint8x2_t, z0,
+ svrshl_s8_x2 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z0_z4_z0:
+** srshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (rshl_z0_z4_z0, svint8x2_t, z0,
+ svrshl_s8_x2 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.b - z29\.b}
+** |
+** srshl [^\n]+, {z28\.b - z29\.b}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z4_z28, svint8x2_t, z0,
+ svrshl_s8_x2 (z4, z28),
+ svrshl (z4, z28))
+
+/*
+** rshl_z18_z18_z4:
+** srshl {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (rshl_z18_z18_z4, svint8x2_t, z18,
+ svrshl_s8_x2 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z23_z23_z18, svint8x2_t, z23,
+ svrshl_s8_x2 (z23, z18),
+ svrshl (z23, z18))
+
+/*
+** rshl_z28_z28_z0:
+** srshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_XN (rshl_z28_z28_z0, svint8x2_t, z28,
+ svrshl_s8_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_z0_z0_z18:
+** srshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z18, svint8x2_t, z0,
+ svrshl_s8_x2 (z0, z18),
+ svrshl (z0, z18))
+
+/*
+** rshl_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** |
+** srshl {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z4_z4_z23, svint8x2_t, z4,
+ svrshl_s8_x2 (z4, z23),
+ svrshl (z4, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint8x2_t, svint8_t, z24,
+ svrshl_single_s8_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** |
+** srshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint8x2_t, svint8_t, z24,
+ svrshl_single_s8_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint8x2_t, svint8_t, z24,
+ svrshl_single_s8_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint8x2_t, svint8_t, z1,
+ svrshl_single_s8_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** srshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint8x2_t, svint8_t, z1,
+ svrshl_single_s8_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** srshl {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint8x2_t, svint8_t, z18,
+ svrshl_single_s8_x2 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** srshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint8x2_t, svint8_t,
+ z0_res = svrshl_single_s8_x2 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** srshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint8x2_t, svint8_t,
+ z0 = svrshl_single_s8_x2 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint8x2_t, svint8_t, z24,
+ svrshl_single_s8_x2 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (rshl_z0_z0_z4, svint8x4_t, z0,
+ svrshl_s8_x4 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z0_z4_z0:
+** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (rshl_z0_z4_z0, svint8x4_t, z0,
+ svrshl_s8_x4 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z4_z28:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.b - z31\.b}
+** |
+** srshl [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z4_z28, svint8x4_t, z0,
+ svrshl_s8_x4 (z4, z28),
+ svrshl (z4, z28))
+
+/*
+** rshl_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z18_z18_z4, svint8x4_t, z18,
+ svrshl_s8_x4 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (rshl_z23_z23_z28, svint8x4_t, z23,
+ svrshl_s8_x4 (z23, z28),
+ svrshl (z23, z28))
+
+/*
+** rshl_z28_z28_z0:
+** srshl {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (rshl_z28_z28_z0, svint8x4_t, z28,
+ svrshl_s8_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** |
+** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z0_z0_z18, svint8x4_t, z0,
+ svrshl_s8_x4 (z0, z18),
+ svrshl (z0, z18))
+
+/*
+** rshl_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** |
+** srshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (rshl_z4_z4_z23, svint8x4_t, z4,
+ svrshl_s8_x4 (z4, z23),
+ svrshl (z4, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint8x4_t, svint8_t, z24,
+ svrshl_single_s8_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** |
+** srshl {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint8x4_t, svint8_t, z24,
+ svrshl_single_s8_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint8x4_t, svint8_t, z24,
+ svrshl_single_s8_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint8x4_t, svint8_t, z1,
+ svrshl_single_s8_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint8x4_t, svint8_t, z1,
+ svrshl_single_s8_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** srshl [^\n]+, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint8x4_t, svint8_t, z18,
+ svrshl_single_s8_x4 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** srshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint8x4_t, svint8_t,
+ z0_res = svrshl_single_s8_x4 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint8x4_t, svint8_t,
+ z0 = svrshl_single_s8_x4 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint8x4_t, svint8_t, z24,
+ svrshl_single_s8_x4 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** urshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z4, svuint16x2_t, svint16x2_t, z0,
+ svrshl_u16_x2 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z4_z4_z0:
+** urshl {z4\.h - z5\.h}, {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z0, svint16x2_t, svuint16x2_t, z4,
+ svrshl_u16_x2 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z28_z4:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z4\.h - z5\.h}
+** |
+** urshl [^\n]+, {z4\.h - z5\.h}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z28_z4, svuint16x2_t, svint16x2_t, z0,
+ svrshl_u16_x2 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z18_z18_z4:
+** urshl {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_DUAL_XN (rshl_z18_z18_z4, svuint16x2_t, svint16x2_t, z18,
+ svrshl_u16_x2 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z23_z23_z18, svint16x2_t, svuint16x2_t, z23,
+ svrshl_u16_x2 (z23, z18),
+ svrshl (z23, z18))
+
+/*
+** rshl_z28_z28_z4:
+** urshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z4, svuint16x2_t, svint16x2_t, z28,
+ svrshl_u16_x2 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z4_z4_z18:
+** urshl {z4\.h - z5\.h}, {z4\.h - z5\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z18, svint16x2_t, svuint16x2_t, z4,
+ svrshl_u16_x2 (z4, z18),
+ svrshl (z4, z18))
+
+/*
+** rshl_z28_z28_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, [^\n]+
+** |
+** urshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z23, svuint16x2_t, svint16x2_t, z28,
+ svrshl_u16_x2 (z28, z23),
+ svrshl (z28, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint16x2_t, svint16_t, z24,
+ svrshl_single_u16_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** |
+** urshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint16x2_t, svint16_t, z24,
+ svrshl_single_u16_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint16x2_t, svint16_t, z24,
+ svrshl_single_u16_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint16x2_t, svint16_t, z1,
+ svrshl_single_u16_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** urshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint16x2_t, svint16_t, z1,
+ svrshl_single_u16_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** urshl {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint16x2_t, svint16_t, z18,
+ svrshl_single_u16_x2 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** urshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint16x2_t, svint16_t,
+ z0_res = svrshl_single_u16_x2 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** urshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint16x2_t, svint16_t,
+ z0 = svrshl_single_u16_x2 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint16x2_t, svint16_t, z24,
+ svrshl_single_u16_x2 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** urshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z4, svuint16x4_t, svint16x4_t, z0,
+ svrshl_u16_x4 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z4_z4_z0:
+** urshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z0, svint16x4_t, svuint16x4_t, z4,
+ svrshl_u16_x4 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z18_z18_z4, svuint16x4_t, svint16x4_t, z18,
+ svrshl_u16_x4 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z23_z23_z28, svint16x4_t, svuint16x4_t, z23,
+ svrshl_u16_x4 (z23, z28),
+ svrshl (z23, z28))
+
+/*
+** rshl_z28_z28_z4:
+** urshl {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z4, svuint16x4_t, svint16x4_t, z28,
+ svrshl_u16_x4 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z4_z4_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** urshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z18, svint16x4_t, svuint16x4_t, z4,
+ svrshl_u16_x4 (z4, z18),
+ svrshl (z4, z18))
+
+/*
+** rshl_z0_z0_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** urshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z23, svuint16x4_t, svint16x4_t, z0,
+ svrshl_u16_x4 (z0, z23),
+ svrshl (z0, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint16x4_t, svint16_t, z24,
+ svrshl_single_u16_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** |
+** urshl {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint16x4_t, svint16_t, z24,
+ svrshl_single_u16_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint16x4_t, svint16_t, z24,
+ svrshl_single_u16_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint16x4_t, svint16_t, z1,
+ svrshl_single_u16_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint16x4_t, svint16_t, z1,
+ svrshl_single_u16_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint16x4_t, svint16_t, z18,
+ svrshl_single_u16_x4 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** urshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint16x4_t, svint16_t,
+ z0_res = svrshl_single_u16_x4 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** urshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint16x4_t, svint16_t,
+ z0 = svrshl_single_u16_x4 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint16x4_t, svint16_t, z24,
+ svrshl_single_u16_x4 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** urshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z4, svuint32x2_t, svint32x2_t, z0,
+ svrshl_u32_x2 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z4_z4_z0:
+** urshl {z4\.s - z5\.s}, {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z0, svint32x2_t, svuint32x2_t, z4,
+ svrshl_u32_x2 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z28_z4:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z4\.s - z5\.s}
+** |
+** urshl [^\n]+, {z4\.s - z5\.s}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z28_z4, svuint32x2_t, svint32x2_t, z0,
+ svrshl_u32_x2 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z18_z18_z4:
+** urshl {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (rshl_z18_z18_z4, svuint32x2_t, svint32x2_t, z18,
+ svrshl_u32_x2 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z23_z23_z18, svint32x2_t, svuint32x2_t, z23,
+ svrshl_u32_x2 (z23, z18),
+ svrshl (z23, z18))
+
+/*
+** rshl_z28_z28_z4:
+** urshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z4, svuint32x2_t, svint32x2_t, z28,
+ svrshl_u32_x2 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z4_z4_z18:
+** urshl {z4\.s - z5\.s}, {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z18, svint32x2_t, svuint32x2_t, z4,
+ svrshl_u32_x2 (z4, z18),
+ svrshl (z4, z18))
+
+/*
+** rshl_z28_z28_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, [^\n]+
+** |
+** urshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z23, svuint32x2_t, svint32x2_t, z28,
+ svrshl_u32_x2 (z28, z23),
+ svrshl (z28, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint32x2_t, svint32_t, z24,
+ svrshl_single_u32_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** |
+** urshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint32x2_t, svint32_t, z24,
+ svrshl_single_u32_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint32x2_t, svint32_t, z24,
+ svrshl_single_u32_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint32x2_t, svint32_t, z1,
+ svrshl_single_u32_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** urshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint32x2_t, svint32_t, z1,
+ svrshl_single_u32_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** urshl {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint32x2_t, svint32_t, z18,
+ svrshl_single_u32_x2 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** urshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint32x2_t, svint32_t,
+ z0_res = svrshl_single_u32_x2 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** urshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint32x2_t, svint32_t,
+ z0 = svrshl_single_u32_x2 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint32x2_t, svint32_t, z24,
+ svrshl_single_u32_x2 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** urshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z4, svuint32x4_t, svint32x4_t, z0,
+ svrshl_u32_x4 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z4_z4_z0:
+** urshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z0, svint32x4_t, svuint32x4_t, z4,
+ svrshl_u32_x4 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z18_z18_z4, svuint32x4_t, svint32x4_t, z18,
+ svrshl_u32_x4 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z23_z23_z28, svint32x4_t, svuint32x4_t, z23,
+ svrshl_u32_x4 (z23, z28),
+ svrshl (z23, z28))
+
+/*
+** rshl_z28_z28_z4:
+** urshl {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z4, svuint32x4_t, svint32x4_t, z28,
+ svrshl_u32_x4 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z4_z4_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** urshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z18, svint32x4_t, svuint32x4_t, z4,
+ svrshl_u32_x4 (z4, z18),
+ svrshl (z4, z18))
+
+/*
+** rshl_z0_z0_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** urshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z23, svuint32x4_t, svint32x4_t, z0,
+ svrshl_u32_x4 (z0, z23),
+ svrshl (z0, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint32x4_t, svint32_t, z24,
+ svrshl_single_u32_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** |
+** urshl {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint32x4_t, svint32_t, z24,
+ svrshl_single_u32_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint32x4_t, svint32_t, z24,
+ svrshl_single_u32_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint32x4_t, svint32_t, z1,
+ svrshl_single_u32_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint32x4_t, svint32_t, z1,
+ svrshl_single_u32_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint32x4_t, svint32_t, z18,
+ svrshl_single_u32_x4 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** urshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint32x4_t, svint32_t,
+ z0_res = svrshl_single_u32_x4 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** urshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint32x4_t, svint32_t,
+ z0 = svrshl_single_u32_x4 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint32x4_t, svint32_t, z24,
+ svrshl_single_u32_x4 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** urshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z4, svuint64x2_t, svint64x2_t, z0,
+ svrshl_u64_x2 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z4_z4_z0:
+** urshl {z4\.d - z5\.d}, {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z0, svint64x2_t, svuint64x2_t, z4,
+ svrshl_u64_x2 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z28_z4:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z4\.d - z5\.d}
+** |
+** urshl [^\n]+, {z4\.d - z5\.d}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z28_z4, svuint64x2_t, svint64x2_t, z0,
+ svrshl_u64_x2 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z18_z18_z4:
+** urshl {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_DUAL_XN (rshl_z18_z18_z4, svuint64x2_t, svint64x2_t, z18,
+ svrshl_u64_x2 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z23_z23_z18, svint64x2_t, svuint64x2_t, z23,
+ svrshl_u64_x2 (z23, z18),
+ svrshl (z23, z18))
+
+/*
+** rshl_z28_z28_z4:
+** urshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z4, svuint64x2_t, svint64x2_t, z28,
+ svrshl_u64_x2 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z4_z4_z18:
+** urshl {z4\.d - z5\.d}, {z4\.d - z5\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z18, svint64x2_t, svuint64x2_t, z4,
+ svrshl_u64_x2 (z4, z18),
+ svrshl (z4, z18))
+
+/*
+** rshl_z28_z28_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, [^\n]+
+** |
+** urshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z23, svuint64x2_t, svint64x2_t, z28,
+ svrshl_u64_x2 (z28, z23),
+ svrshl (z28, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint64x2_t, svint64_t, z24,
+ svrshl_single_u64_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** |
+** urshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint64x2_t, svint64_t, z24,
+ svrshl_single_u64_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint64x2_t, svint64_t, z24,
+ svrshl_single_u64_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint64x2_t, svint64_t, z1,
+ svrshl_single_u64_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** urshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint64x2_t, svint64_t, z1,
+ svrshl_single_u64_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** urshl {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint64x2_t, svint64_t, z18,
+ svrshl_single_u64_x2 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** urshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint64x2_t, svint64_t,
+ z0_res = svrshl_single_u64_x2 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** urshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint64x2_t, svint64_t,
+ z0 = svrshl_single_u64_x2 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint64x2_t, svint64_t, z24,
+ svrshl_single_u64_x2 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** urshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z4, svuint64x4_t, svint64x4_t, z0,
+ svrshl_u64_x4 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z4_z4_z0:
+** urshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z0, svint64x4_t, svuint64x4_t, z4,
+ svrshl_u64_x4 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z18_z18_z4, svuint64x4_t, svint64x4_t, z18,
+ svrshl_u64_x4 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z23_z23_z28, svint64x4_t, svuint64x4_t, z23,
+ svrshl_u64_x4 (z23, z28),
+ svrshl (z23, z28))
+
+/*
+** rshl_z28_z28_z4:
+** urshl {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z4, svuint64x4_t, svint64x4_t, z28,
+ svrshl_u64_x4 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z4_z4_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** urshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z18, svint64x4_t, svuint64x4_t, z4,
+ svrshl_u64_x4 (z4, z18),
+ svrshl (z4, z18))
+
+/*
+** rshl_z0_z0_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** urshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z23, svuint64x4_t, svint64x4_t, z0,
+ svrshl_u64_x4 (z0, z23),
+ svrshl (z0, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint64x4_t, svint64_t, z24,
+ svrshl_single_u64_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** |
+** urshl {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint64x4_t, svint64_t, z24,
+ svrshl_single_u64_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint64x4_t, svint64_t, z24,
+ svrshl_single_u64_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint64x4_t, svint64_t, z1,
+ svrshl_single_u64_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint64x4_t, svint64_t, z1,
+ svrshl_single_u64_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint64x4_t, svint64_t, z18,
+ svrshl_single_u64_x4 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** urshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint64x4_t, svint64_t,
+ z0_res = svrshl_single_u64_x4 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** urshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint64x4_t, svint64_t,
+ z0 = svrshl_single_u64_x4 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint64x4_t, svint64_t, z24,
+ svrshl_single_u64_x4 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** urshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z4, svuint8x2_t, svint8x2_t, z0,
+ svrshl_u8_x2 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z4_z4_z0:
+** urshl {z4\.b - z5\.b}, {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z0, svint8x2_t, svuint8x2_t, z4,
+ svrshl_u8_x2 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z0_z28_z4:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z4\.b - z5\.b}
+** |
+** urshl [^\n]+, {z4\.b - z5\.b}
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z28_z4, svuint8x2_t, svint8x2_t, z0,
+ svrshl_u8_x2 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z18_z18_z4:
+** urshl {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_DUAL_XN (rshl_z18_z18_z4, svuint8x2_t, svint8x2_t, z18,
+ svrshl_u8_x2 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z23_z23_z18, svint8x2_t, svuint8x2_t, z23,
+ svrshl_u8_x2 (z23, z18),
+ svrshl (z23, z18))
+
+/*
+** rshl_z28_z28_z4:
+** urshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z4, svuint8x2_t, svint8x2_t, z28,
+ svrshl_u8_x2 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z4_z4_z18:
+** urshl {z4\.b - z5\.b}, {z4\.b - z5\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z18, svint8x2_t, svuint8x2_t, z4,
+ svrshl_u8_x2 (z4, z18),
+ svrshl (z4, z18))
+
+/*
+** rshl_z28_z28_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, [^\n]+
+** |
+** urshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z23, svuint8x2_t, svint8x2_t, z28,
+ svrshl_u8_x2 (z28, z23),
+ svrshl (z28, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint8x2_t, svint8_t, z24,
+ svrshl_single_u8_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** |
+** urshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint8x2_t, svint8_t, z24,
+ svrshl_single_u8_x2 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** (
+** mov z24\.d, z1\.d
+** mov z25\.d, z2\.d
+** |
+** mov z25\.d, z2\.d
+** mov z24\.d, z1\.d
+** )
+** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint8x2_t, svint8_t, z24,
+ svrshl_single_u8_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b
+** (
+** mov z1\.d, z24\.d
+** mov z2\.d, z25\.d
+** |
+** mov z2\.d, z25\.d
+** mov z1\.d, z24\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint8x2_t, svint8_t, z1,
+ svrshl_single_u8_x2 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** urshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint8x2_t, svint8_t, z1,
+ svrshl_single_u8_x2 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** urshl {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint8x2_t, svint8_t, z18,
+ svrshl_single_u8_x2 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** urshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint8x2_t, svint8_t,
+ z0_res = svrshl_single_u8_x2 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** urshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint8x2_t, svint8_t,
+ z0 = svrshl_single_u8_x2 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint8x2_t, svint8_t, z24,
+ svrshl_single_u8_x2 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** rshl_z0_z0_z4:
+** urshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z4, svuint8x4_t, svint8x4_t, z0,
+ svrshl_u8_x4 (z0, z4),
+ svrshl (z0, z4))
+
+/*
+** rshl_z4_z4_z0:
+** urshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z0, svint8x4_t, svuint8x4_t, z4,
+ svrshl_u8_x4 (z4, z0),
+ svrshl (z4, z0))
+
+/*
+** rshl_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z18_z18_z4, svuint8x4_t, svint8x4_t, z18,
+ svrshl_u8_x4 (z18, z4),
+ svrshl (z18, z4))
+
+/*
+** rshl_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (rshl_z23_z23_z28, svint8x4_t, svuint8x4_t, z23,
+ svrshl_u8_x4 (z23, z28),
+ svrshl (z23, z28))
+
+/*
+** rshl_z28_z28_z4:
+** urshl {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_DUAL_XN (rshl_z28_z28_z4, svuint8x4_t, svint8x4_t, z28,
+ svrshl_u8_x4 (z28, z4),
+ svrshl (z28, z4))
+
+/*
+** rshl_z4_z4_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** |
+** urshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z4_z4_z18, svint8x4_t, svuint8x4_t, z4,
+ svrshl_u8_x4 (z4, z18),
+ svrshl (z4, z18))
+
+/*
+** rshl_z0_z0_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** |
+** urshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_DUAL_XN (rshl_z0_z0_z23, svuint8x4_t, svint8x4_t, z0,
+ svrshl_u8_x4 (z0, z23),
+ svrshl (z0, z23))
+
+/*
+** rshl_single_z24_z24_z0:
+** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint8x4_t, svint8_t, z24,
+ svrshl_single_u8_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z24_z28_z0:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** |
+** urshl {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint8x4_t, svint8_t, z24,
+ svrshl_single_u8_x4 (z28, z0),
+ svrshl (z28, z0))
+
+/*
+** rshl_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint8x4_t, svint8_t, z24,
+ svrshl_single_u8_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z1_z24_z0:
+** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint8x4_t, svint8_t, z1,
+ svrshl_single_u8_x4 (z24, z0),
+ svrshl (z24, z0))
+
+/*
+** rshl_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint8x4_t, svint8_t, z1,
+ svrshl_single_u8_x4 (z1, z0),
+ svrshl (z1, z0))
+
+/*
+** rshl_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** urshl [^\n]+, z0\.b
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint8x4_t, svint8_t, z18,
+ svrshl_single_u8_x4 (z18, z0),
+ svrshl (z18, z0))
+
+/*
+** rshl_single_awkward:
+** ...
+** urshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint8x4_t, svint8_t,
+ z0_res = svrshl_single_u8_x4 (z1, z0),
+ z0_res = svrshl (z1, z0))
+
+/*
+** rshl_single_z0_z0_z15:
+** ...
+** urshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint8x4_t, svint8_t,
+ z0 = svrshl_single_u8_x4 (z0, z15),
+ z0 = svrshl (z0, z15))
+
+/*
+** rshl_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b
+** ret
+*/
+TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint8x4_t, svint8_t, z24,
+ svrshl_single_u8_x4 (z24, z16),
+ svrshl (z24, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svbfloat16x2_t, z0,
+ svsel_bf16_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svbfloat16x2_t, z0,
+ svsel_bf16_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.h - z1\.h}, pn8, {z4\.h - z5\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svbfloat16x2_t, z0,
+ svsel_bf16_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.h - z5\.h}, pn8, {z18\.h - z19\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svbfloat16x2_t, z4,
+ svsel_bf16_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.h - z19\.h}, pn15, {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svbfloat16x2_t, z18,
+ svsel_bf16_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.h - z19\.h}, pn8, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svbfloat16x2_t, z18,
+ svsel_bf16_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svbfloat16x2_t, z23,
+ svsel_bf16_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.h - z1\.h}, pn15, {[^}]+}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svbfloat16x2_t, z0,
+ svsel_bf16_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.h - z1\.h}, pn8, {z28\.h - z29\.h}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svbfloat16x2_t, z0,
+ svsel_bf16_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svbfloat16x4_t, z0,
+ svsel_bf16_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svbfloat16x4_t, z0,
+ svsel_bf16_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.h - z3\.h}, pn8, {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svbfloat16x4_t, z0,
+ svsel_bf16_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.h - z7\.h}, pn8, {[^}]+}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svbfloat16x4_t, z4,
+ svsel_bf16_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.h - z31\.h}, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svbfloat16x4_t, z18,
+ svsel_bf16_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svbfloat16x4_t, z18,
+ svsel_bf16_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.h - z3\.h}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svbfloat16x4_t, z23,
+ svsel_bf16_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svfloat16x2_t, z0,
+ svsel_f16_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svfloat16x2_t, z0,
+ svsel_f16_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.h - z1\.h}, pn8, {z4\.h - z5\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svfloat16x2_t, z0,
+ svsel_f16_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.h - z5\.h}, pn8, {z18\.h - z19\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svfloat16x2_t, z4,
+ svsel_f16_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.h - z19\.h}, pn15, {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svfloat16x2_t, z18,
+ svsel_f16_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.h - z19\.h}, pn8, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svfloat16x2_t, z18,
+ svsel_f16_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svfloat16x2_t, z23,
+ svsel_f16_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.h - z1\.h}, pn15, {[^}]+}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svfloat16x2_t, z0,
+ svsel_f16_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.h - z1\.h}, pn8, {z28\.h - z29\.h}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svfloat16x2_t, z0,
+ svsel_f16_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svfloat16x4_t, z0,
+ svsel_f16_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svfloat16x4_t, z0,
+ svsel_f16_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.h - z3\.h}, pn8, {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svfloat16x4_t, z0,
+ svsel_f16_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.h - z7\.h}, pn8, {[^}]+}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svfloat16x4_t, z4,
+ svsel_f16_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.h - z31\.h}, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svfloat16x4_t, z18,
+ svsel_f16_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svfloat16x4_t, z18,
+ svsel_f16_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.h - z3\.h}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svfloat16x4_t, z23,
+ svsel_f16_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svfloat32x2_t, z0,
+ svsel_f32_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svfloat32x2_t, z0,
+ svsel_f32_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.s - z1\.s}, pn8, {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svfloat32x2_t, z0,
+ svsel_f32_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.s - z5\.s}, pn8, {z18\.s - z19\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svfloat32x2_t, z4,
+ svsel_f32_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.s - z19\.s}, pn15, {z28\.s - z29\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svfloat32x2_t, z18,
+ svsel_f32_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.s - z19\.s}, pn8, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svfloat32x2_t, z18,
+ svsel_f32_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svfloat32x2_t, z23,
+ svsel_f32_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.s - z1\.s}, pn15, {[^}]+}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svfloat32x2_t, z0,
+ svsel_f32_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.s - z1\.s}, pn8, {z28\.s - z29\.s}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svfloat32x2_t, z0,
+ svsel_f32_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svfloat32x4_t, z0,
+ svsel_f32_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svfloat32x4_t, z0,
+ svsel_f32_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.s - z3\.s}, pn8, {z4\.s - z7\.s}, {z28\.s - z31\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svfloat32x4_t, z0,
+ svsel_f32_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.s - z7\.s}, pn8, {[^}]+}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svfloat32x4_t, z4,
+ svsel_f32_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.s - z31\.s}, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svfloat32x4_t, z18,
+ svsel_f32_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svfloat32x4_t, z18,
+ svsel_f32_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.s - z3\.s}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svfloat32x4_t, z23,
+ svsel_f32_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svfloat64x2_t, z0,
+ svsel_f64_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svfloat64x2_t, z0,
+ svsel_f64_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.d - z1\.d}, pn8, {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svfloat64x2_t, z0,
+ svsel_f64_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.d - z5\.d}, pn8, {z18\.d - z19\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svfloat64x2_t, z4,
+ svsel_f64_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.d - z19\.d}, pn15, {z28\.d - z29\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svfloat64x2_t, z18,
+ svsel_f64_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.d - z19\.d}, pn8, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svfloat64x2_t, z18,
+ svsel_f64_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svfloat64x2_t, z23,
+ svsel_f64_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.d - z1\.d}, pn15, {[^}]+}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svfloat64x2_t, z0,
+ svsel_f64_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.d - z1\.d}, pn8, {z28\.d - z29\.d}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svfloat64x2_t, z0,
+ svsel_f64_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svfloat64x4_t, z0,
+ svsel_f64_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svfloat64x4_t, z0,
+ svsel_f64_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.d - z3\.d}, pn8, {z4\.d - z7\.d}, {z28\.d - z31\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svfloat64x4_t, z0,
+ svsel_f64_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.d - z7\.d}, pn8, {[^}]+}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svfloat64x4_t, z4,
+ svsel_f64_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.d - z31\.d}, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svfloat64x4_t, z18,
+ svsel_f64_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svfloat64x4_t, z18,
+ svsel_f64_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.d - z3\.d}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svfloat64x4_t, z23,
+ svsel_f64_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svint16x2_t, z0,
+ svsel_s16_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svint16x2_t, z0,
+ svsel_s16_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.h - z1\.h}, pn8, {z4\.h - z5\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svint16x2_t, z0,
+ svsel_s16_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.h - z5\.h}, pn8, {z18\.h - z19\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svint16x2_t, z4,
+ svsel_s16_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.h - z19\.h}, pn15, {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svint16x2_t, z18,
+ svsel_s16_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.h - z19\.h}, pn8, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svint16x2_t, z18,
+ svsel_s16_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svint16x2_t, z23,
+ svsel_s16_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.h - z1\.h}, pn15, {[^}]+}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svint16x2_t, z0,
+ svsel_s16_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.h - z1\.h}, pn8, {z28\.h - z29\.h}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svint16x2_t, z0,
+ svsel_s16_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svint16x4_t, z0,
+ svsel_s16_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svint16x4_t, z0,
+ svsel_s16_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.h - z3\.h}, pn8, {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svint16x4_t, z0,
+ svsel_s16_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.h - z7\.h}, pn8, {[^}]+}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svint16x4_t, z4,
+ svsel_s16_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.h - z31\.h}, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svint16x4_t, z18,
+ svsel_s16_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svint16x4_t, z18,
+ svsel_s16_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.h - z3\.h}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svint16x4_t, z23,
+ svsel_s16_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svint32x2_t, z0,
+ svsel_s32_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svint32x2_t, z0,
+ svsel_s32_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.s - z1\.s}, pn8, {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svint32x2_t, z0,
+ svsel_s32_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.s - z5\.s}, pn8, {z18\.s - z19\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svint32x2_t, z4,
+ svsel_s32_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.s - z19\.s}, pn15, {z28\.s - z29\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svint32x2_t, z18,
+ svsel_s32_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.s - z19\.s}, pn8, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svint32x2_t, z18,
+ svsel_s32_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svint32x2_t, z23,
+ svsel_s32_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.s - z1\.s}, pn15, {[^}]+}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svint32x2_t, z0,
+ svsel_s32_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.s - z1\.s}, pn8, {z28\.s - z29\.s}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svint32x2_t, z0,
+ svsel_s32_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svint32x4_t, z0,
+ svsel_s32_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svint32x4_t, z0,
+ svsel_s32_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.s - z3\.s}, pn8, {z4\.s - z7\.s}, {z28\.s - z31\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svint32x4_t, z0,
+ svsel_s32_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.s - z7\.s}, pn8, {[^}]+}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svint32x4_t, z4,
+ svsel_s32_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.s - z31\.s}, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svint32x4_t, z18,
+ svsel_s32_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svint32x4_t, z18,
+ svsel_s32_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.s - z3\.s}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svint32x4_t, z23,
+ svsel_s32_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svint64x2_t, z0,
+ svsel_s64_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svint64x2_t, z0,
+ svsel_s64_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.d - z1\.d}, pn8, {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svint64x2_t, z0,
+ svsel_s64_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.d - z5\.d}, pn8, {z18\.d - z19\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svint64x2_t, z4,
+ svsel_s64_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.d - z19\.d}, pn15, {z28\.d - z29\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svint64x2_t, z18,
+ svsel_s64_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.d - z19\.d}, pn8, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svint64x2_t, z18,
+ svsel_s64_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svint64x2_t, z23,
+ svsel_s64_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.d - z1\.d}, pn15, {[^}]+}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svint64x2_t, z0,
+ svsel_s64_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.d - z1\.d}, pn8, {z28\.d - z29\.d}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svint64x2_t, z0,
+ svsel_s64_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svint64x4_t, z0,
+ svsel_s64_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svint64x4_t, z0,
+ svsel_s64_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.d - z3\.d}, pn8, {z4\.d - z7\.d}, {z28\.d - z31\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svint64x4_t, z0,
+ svsel_s64_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.d - z7\.d}, pn8, {[^}]+}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svint64x4_t, z4,
+ svsel_s64_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.d - z31\.d}, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svint64x4_t, z18,
+ svsel_s64_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svint64x4_t, z18,
+ svsel_s64_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.d - z3\.d}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svint64x4_t, z23,
+ svsel_s64_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svint8x2_t, z0,
+ svsel_s8_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svint8x2_t, z0,
+ svsel_s8_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.b - z1\.b}, pn8, {z4\.b - z5\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svint8x2_t, z0,
+ svsel_s8_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.b - z5\.b}, pn8, {z18\.b - z19\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svint8x2_t, z4,
+ svsel_s8_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.b - z19\.b}, pn15, {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svint8x2_t, z18,
+ svsel_s8_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.b - z19\.b}, pn8, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svint8x2_t, z18,
+ svsel_s8_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svint8x2_t, z23,
+ svsel_s8_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.b - z1\.b}, pn15, {[^}]+}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svint8x2_t, z0,
+ svsel_s8_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.b - z1\.b}, pn8, {z28\.b - z29\.b}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svint8x2_t, z0,
+ svsel_s8_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svint8x4_t, z0,
+ svsel_s8_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svint8x4_t, z0,
+ svsel_s8_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.b - z3\.b}, pn8, {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svint8x4_t, z0,
+ svsel_s8_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.b - z7\.b}, pn8, {[^}]+}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svint8x4_t, z4,
+ svsel_s8_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.b - z31\.b}, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svint8x4_t, z18,
+ svsel_s8_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svint8x4_t, z18,
+ svsel_s8_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.b - z3\.b}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svint8x4_t, z23,
+ svsel_s8_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svuint16x2_t, z0,
+ svsel_u16_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svuint16x2_t, z0,
+ svsel_u16_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.h - z1\.h}, pn8, {z4\.h - z5\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svuint16x2_t, z0,
+ svsel_u16_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.h - z5\.h}, pn8, {z18\.h - z19\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svuint16x2_t, z4,
+ svsel_u16_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.h - z19\.h}, pn15, {z28\.h - z29\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svuint16x2_t, z18,
+ svsel_u16_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.h - z19\.h}, pn8, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svuint16x2_t, z18,
+ svsel_u16_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svuint16x2_t, z23,
+ svsel_u16_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.h - z1\.h}, pn15, {[^}]+}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svuint16x2_t, z0,
+ svsel_u16_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.h - z1\.h}, pn8, {z28\.h - z29\.h}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svuint16x2_t, z0,
+ svsel_u16_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svuint16x4_t, z0,
+ svsel_u16_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svuint16x4_t, z0,
+ svsel_u16_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.h - z3\.h}, pn8, {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svuint16x4_t, z0,
+ svsel_u16_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.h - z7\.h}, pn8, {[^}]+}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svuint16x4_t, z4,
+ svsel_u16_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.h - z31\.h}, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svuint16x4_t, z18,
+ svsel_u16_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svuint16x4_t, z18,
+ svsel_u16_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.h - z3\.h}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svuint16x4_t, z23,
+ svsel_u16_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svuint32x2_t, z0,
+ svsel_u32_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svuint32x2_t, z0,
+ svsel_u32_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.s - z1\.s}, pn8, {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svuint32x2_t, z0,
+ svsel_u32_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.s - z5\.s}, pn8, {z18\.s - z19\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svuint32x2_t, z4,
+ svsel_u32_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.s - z19\.s}, pn15, {z28\.s - z29\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svuint32x2_t, z18,
+ svsel_u32_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.s - z19\.s}, pn8, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svuint32x2_t, z18,
+ svsel_u32_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svuint32x2_t, z23,
+ svsel_u32_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.s - z1\.s}, pn15, {[^}]+}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svuint32x2_t, z0,
+ svsel_u32_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.s - z1\.s}, pn8, {z28\.s - z29\.s}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svuint32x2_t, z0,
+ svsel_u32_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svuint32x4_t, z0,
+ svsel_u32_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svuint32x4_t, z0,
+ svsel_u32_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.s - z3\.s}, pn8, {z4\.s - z7\.s}, {z28\.s - z31\.s}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svuint32x4_t, z0,
+ svsel_u32_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.s - z7\.s}, pn8, {[^}]+}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svuint32x4_t, z4,
+ svsel_u32_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.s - z31\.s}, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svuint32x4_t, z18,
+ svsel_u32_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svuint32x4_t, z18,
+ svsel_u32_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.s - z3\.s}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svuint32x4_t, z23,
+ svsel_u32_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svuint64x2_t, z0,
+ svsel_u64_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svuint64x2_t, z0,
+ svsel_u64_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.d - z1\.d}, pn8, {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svuint64x2_t, z0,
+ svsel_u64_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.d - z5\.d}, pn8, {z18\.d - z19\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svuint64x2_t, z4,
+ svsel_u64_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.d - z19\.d}, pn15, {z28\.d - z29\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svuint64x2_t, z18,
+ svsel_u64_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.d - z19\.d}, pn8, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svuint64x2_t, z18,
+ svsel_u64_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svuint64x2_t, z23,
+ svsel_u64_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.d - z1\.d}, pn15, {[^}]+}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svuint64x2_t, z0,
+ svsel_u64_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.d - z1\.d}, pn8, {z28\.d - z29\.d}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svuint64x2_t, z0,
+ svsel_u64_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svuint64x4_t, z0,
+ svsel_u64_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svuint64x4_t, z0,
+ svsel_u64_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.d - z3\.d}, pn8, {z4\.d - z7\.d}, {z28\.d - z31\.d}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svuint64x4_t, z0,
+ svsel_u64_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.d - z7\.d}, pn8, {[^}]+}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svuint64x4_t, z4,
+ svsel_u64_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.d - z31\.d}, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svuint64x4_t, z18,
+ svsel_u64_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svuint64x4_t, z18,
+ svsel_u64_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.d - z3\.d}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svuint64x4_t, z23,
+ svsel_u64_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svuint8x2_t, z0,
+ svsel_u8_x2 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svuint8x2_t, z0,
+ svsel_u8_x2 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.b - z1\.b}, pn8, {z4\.b - z5\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svuint8x2_t, z0,
+ svsel_u8_x2 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** sel {z4\.b - z5\.b}, pn8, {z18\.b - z19\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svuint8x2_t, z4,
+ svsel_u8_x2 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {z18\.b - z19\.b}, pn15, {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svuint8x2_t, z18,
+ svsel_u8_x2 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** sel {z18\.b - z19\.b}, pn8, {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svuint8x2_t, z18,
+ svsel_u8_x2 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** sel [^\n]+, pn15, {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svuint8x2_t, z23,
+ svsel_u8_x2 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
+
+/*
+** sel_z0_pn15_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.b - z1\.b}, pn15, {[^}]+}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn15_z23_z28, svuint8x2_t, z0,
+ svsel_u8_x2 (pn15, z23, z28),
+ svsel (pn15, z23, z28))
+
+/*
+** sel_z0_pn8_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sel {z0\.b - z1\.b}, pn8, {z28\.b - z29\.b}, {[^}]+}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z28_z23, svuint8x2_t, z0,
+ svsel_u8_x2 (pn8, z28, z23),
+ svsel (pn8, z28, z23))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sel_z0_pn0_z0_z4:
+** mov p([0-9]+)\.b, p0\.b
+** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn0_z0_z4, svuint8x4_t, z0,
+ svsel_u8_x4 (pn0, z0, z4),
+ svsel (pn0, z0, z4))
+
+/*
+** sel_z0_pn7_z0_z4:
+** mov p([0-9]+)\.b, p7\.b
+** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn7_z0_z4, svuint8x4_t, z0,
+ svsel_u8_x4 (pn7, z0, z4),
+ svsel (pn7, z0, z4))
+
+/*
+** sel_z0_pn8_z4_z28:
+** sel {z0\.b - z3\.b}, pn8, {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_XN (sel_z0_pn8_z4_z28, svuint8x4_t, z0,
+ svsel_u8_x4 (pn8, z4, z28),
+ svsel (pn8, z4, z28))
+
+/*
+** sel_z4_pn8_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {z4\.b - z7\.b}, pn8, {[^}]+}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (sel_z4_pn8_z18_z0, svuint8x4_t, z4,
+ svsel_u8_x4 (pn8, z18, z0),
+ svsel (pn8, z18, z0))
+
+/*
+** sel_z18_pn15_z28_z4:
+** sel {[^}]+}, pn15, {z28\.b - z31\.b}, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn15_z28_z4, svuint8x4_t, z18,
+ svsel_u8_x4 (pn15, z28, z4),
+ svsel (pn15, z28, z4))
+
+/*
+** sel_z18_pn8_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel {[^}]+}, pn8, {[^}]+}, {z4\.b - z7\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z18_pn8_z18_z4, svuint8x4_t, z18,
+ svsel_u8_x4 (pn8, z18, z4),
+ svsel (pn8, z18, z4))
+
+/*
+** sel_z23_pn15_z0_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sel [^\n]+, pn15, {z0\.b - z3\.b}, {[^}]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (sel_z23_pn15_z0_z18, svuint8x4_t, z23,
+ svsel_u8_x4 (pn15, z0, z18),
+ svsel (pn15, z0, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_bf16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_bf16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_bf16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_bf16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_bf16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_bf16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_bf16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_bf16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_bf16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_bf16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_bf16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_bf16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_bf16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_bf16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_bf16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_bf16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_bf16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_bf16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_bf16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_bf16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_bf16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_bf16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_bf16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_bf16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_bf16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_bf16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_bf16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_bf16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_bf16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_bf16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_bf16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_bf16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_f16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_base, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_index, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_1, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_f16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_2, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_f16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_14, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_16, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m1, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_f16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m2, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_f16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m16, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m18, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z17, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z22, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z28, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn0, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn7, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn15, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_0, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_1, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_f16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_2, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_f16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_14, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_16, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_f16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_f16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_f16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_base, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_index, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_1, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_2, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_3, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_f16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_4, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_f16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_28, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_f16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_32, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m1, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m2, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m3, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_f16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m4, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_f16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m32, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_f16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m36, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z17, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z22, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z28, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn0, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn7, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn15, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_0, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_1, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_2, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_3, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_f16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_4, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_f16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_28, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_f16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_32, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_f16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_f16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_f16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_f32_base:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_base, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f32_index:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_index, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_1, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/*
+** st1_f32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_2, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** st1_f32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_14, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svst1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_16, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svst1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m1, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/*
+** st1_f32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m2, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** st1_f32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m16, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svst1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** st1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m18, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svst1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** st1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z17, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f32_z22:
+** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z22, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f32_z28:
+** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z28, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn0, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn7, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f32_pn15:
+** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn15, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f32_0:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_0, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_1, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_f32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_2, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_f32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_14, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_16, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_f32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_f32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_f32_base:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_base, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f32_index:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_index, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_1, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_2, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_3, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svst1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** st1_f32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_4, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svst1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** st1_f32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_28, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svst1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** st1_f32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_32, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svst1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m1, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m2, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m3, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svst1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** st1_f32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m4, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svst1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** st1_f32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m32, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svst1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** st1_f32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m36, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svst1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** st1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z17, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z22, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f32_z28:
+** st1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z28, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn0, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn7, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f32_pn15:
+** st1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn15, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f32_0:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_0, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_1, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_2, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_3, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_f32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_4, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_f32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_28, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_f32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_32, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_f32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_f32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_f32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_f64_base:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_base, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f64_index:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_index, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_1, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/*
+** st1_f64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_2, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** st1_f64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_14, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svst1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_16, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svst1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m1, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/*
+** st1_f64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m2, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** st1_f64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m16, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svst1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** st1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m18, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svst1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** st1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z17, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f64_z22:
+** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z22, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f64_z28:
+** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z28, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn0, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn7, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f64_pn15:
+** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn15, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f64_0:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_0, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_1, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_f64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_2, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_f64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_14, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_16, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_f64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_f64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_f64_base:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_base, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f64_index:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_index, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_1, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_2, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_3, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svst1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** st1_f64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_4, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svst1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** st1_f64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_28, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svst1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** st1_f64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_32, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svst1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m1, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m2, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m3, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svst1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** st1_f64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m4, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svst1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** st1_f64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m32, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svst1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** st1_f64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m36, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svst1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** st1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z17, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z22, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f64_z28:
+** st1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z28, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn0, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn7, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f64_pn15:
+** st1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn15, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f64_0:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_0, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_1, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_2, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_3, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_f64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_4, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_f64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_28, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_f64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_32, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_f64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_f64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_f64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_s16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_base, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_index, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_1, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_s16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_2, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_s16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_14, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_16, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m1, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_s16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m2, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_s16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m16, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m18, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z17, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z22, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z28, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn0, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn7, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn15, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_0, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_1, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_2, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_14, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_16, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m1, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m2, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m16, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m18, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_x1, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_s16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_base, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_index, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_1, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_2, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_3, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_s16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_4, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_s16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_28, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_s16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_32, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m1, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m2, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m3, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_s16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m4, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_s16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m32, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_s16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m36, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z17, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z22, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z28, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn0, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn7, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn15, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_0, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_1, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_2, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_3, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_4, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_28, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_32, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m1, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m2, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m3, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m4, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m32, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m36, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_x1, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_s32_base:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_base, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s32_index:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_index, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_1, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/*
+** st1_s32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_2, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** st1_s32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_14, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svst1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_16, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svst1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m1, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/*
+** st1_s32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m2, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** st1_s32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m16, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svst1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** st1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m18, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svst1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** st1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z17, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s32_z22:
+** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z22, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s32_z28:
+** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z28, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn0, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn7, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s32_pn15:
+** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn15, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s32_0:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_0, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_1, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_2, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_14, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_16, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m1, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m2, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m16, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m18, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_x1, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_s32_base:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_base, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s32_index:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_index, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_1, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_2, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_3, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svst1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** st1_s32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_4, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svst1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** st1_s32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_28, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svst1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** st1_s32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_32, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svst1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m1, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m2, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m3, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svst1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** st1_s32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m4, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svst1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** st1_s32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m32, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svst1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** st1_s32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m36, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svst1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** st1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z17, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z22, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s32_z28:
+** st1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z28, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn0, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn7, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s32_pn15:
+** st1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn15, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s32_0:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_0, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_1, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_2, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_3, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_4, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_28, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_32, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m1, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m2, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m3, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m4, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m32, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m36, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_x1, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_s64_base:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_base, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s64_index:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_index, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_1, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/*
+** st1_s64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_2, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** st1_s64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_14, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svst1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_16, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svst1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m1, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/*
+** st1_s64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m2, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** st1_s64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m16, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svst1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** st1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m18, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svst1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** st1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z17, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s64_z22:
+** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z22, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s64_z28:
+** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z28, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn0, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn7, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s64_pn15:
+** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn15, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s64_0:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_0, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_1, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_2, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_14, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_16, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m1, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m2, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m16, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m18, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_x1, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_s64_base:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_base, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s64_index:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_index, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_1, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_2, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_3, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svst1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** st1_s64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_4, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svst1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** st1_s64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_28, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svst1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** st1_s64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_32, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svst1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m1, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m2, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m3, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svst1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** st1_s64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m4, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svst1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** st1_s64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m32, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svst1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** st1_s64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m36, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svst1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** st1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z17, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z22, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s64_z28:
+** st1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z28, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn0, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn7, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s64_pn15:
+** st1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn15, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s64_0:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_0, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_1, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_2, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_3, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_4, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_28, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_32, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m1, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m2, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m3, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m4, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m32, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m36, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_x1, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_s8_base:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_base, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s8_index:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_index, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_1, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/*
+** st1_s8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_2, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** st1_s8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_14, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svst1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_16, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svst1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m1, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/*
+** st1_s8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m2, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** st1_s8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m16, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svst1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** st1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m18, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svst1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** st1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z17, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s8_z22:
+** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z22, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s8_z28:
+** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z28, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn0, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn7, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s8_pn15:
+** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn15, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s8_0:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_0, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_1, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_2, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_14, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_16, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m1, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m2, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m16, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m18, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_x1, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_s8_base:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_base, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s8_index:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_index, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_1, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_2, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_3, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svst1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** st1_s8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_4, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svst1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** st1_s8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_28, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svst1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** st1_s8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_32, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svst1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m1, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m2, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m3, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svst1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** st1_s8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m4, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svst1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** st1_s8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m32, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svst1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** st1_s8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m36, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svst1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** st1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z17, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z22, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s8_z28:
+** st1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z28, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn0, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn7, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s8_pn15:
+** st1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn15, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s8_0:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_0, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_1, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_2, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_3, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_4, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_28, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_32, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m1, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m2, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m3, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m4, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m32, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m36, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_x1, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_u16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_base, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_index, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_1, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_u16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_2, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_u16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_14, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_16, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m1, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_u16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m2, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_u16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m16, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m18, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z17, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z22, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z28, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn0, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn7, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn15, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_0, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_1, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_2, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_14, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_16, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_u16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_base, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_index, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_1, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_2, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_3, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_u16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_4, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_u16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_28, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_u16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_32, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m1, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m2, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m3, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_u16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m4, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_u16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m32, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_u16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m36, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z17, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z22, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z28, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn0, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn7, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn15, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_0, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_1, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_2, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_3, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_4, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_28, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_32, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_u32_base:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_base, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u32_index:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_index, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_1, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/*
+** st1_u32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_2, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** st1_u32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_14, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svst1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_16, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svst1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m1, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/*
+** st1_u32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m2, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** st1_u32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m16, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svst1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** st1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m18, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svst1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** st1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z17, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u32_z22:
+** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z22, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u32_z28:
+** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z28, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn0, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn7, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u32_pn15:
+** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn15, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u32_0:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_0, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_1, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_2, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_14, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_16, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_u32_base:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_base, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u32_index:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_index, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_1, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_2, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_3, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svst1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** st1_u32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_4, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svst1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** st1_u32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_28, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svst1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** st1_u32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_32, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svst1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m1, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m2, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m3, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svst1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** st1_u32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m4, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svst1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** st1_u32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m32, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svst1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** st1_u32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m36, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svst1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** st1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z17, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z22, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u32_z28:
+** st1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z28, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn0, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn7, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u32_pn15:
+** st1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn15, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u32_0:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_0, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_1, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_2, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_3, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_4, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_28, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_32, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_u64_base:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_base, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u64_index:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_index, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_1, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/*
+** st1_u64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_2, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** st1_u64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_14, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svst1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_16, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svst1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m1, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/*
+** st1_u64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m2, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** st1_u64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m16, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svst1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** st1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m18, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svst1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** st1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z17, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u64_z22:
+** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z22, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u64_z28:
+** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z28, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn0, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn7, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u64_pn15:
+** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn15, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u64_0:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_0, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_1, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_2, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_14, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_16, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_u64_base:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_base, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u64_index:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_index, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_1, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_2, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_3, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svst1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** st1_u64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_4, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svst1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** st1_u64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_28, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svst1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** st1_u64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_32, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svst1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m1, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m2, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m3, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svst1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** st1_u64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m4, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svst1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** st1_u64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m32, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svst1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** st1_u64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m36, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svst1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** st1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z17, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z22, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u64_z28:
+** st1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z28, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn0, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn7, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u64_pn15:
+** st1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn15, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u64_0:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_0, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_1, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_2, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_3, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_4, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_28, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_32, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_u8_base:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_base, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u8_index:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_index, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_1, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/*
+** st1_u8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_2, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** st1_u8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_14, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svst1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_16, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svst1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m1, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/*
+** st1_u8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m2, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** st1_u8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m16, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svst1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** st1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m18, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svst1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** st1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z17, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u8_z22:
+** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z22, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u8_z28:
+** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z28, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn0, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn7, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u8_pn15:
+** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn15, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u8_0:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_0, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_1, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_2, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_14, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_16, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** st1_u8_base:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_base, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u8_index:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_index, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_1, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_2, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_3, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svst1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** st1_u8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_4, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svst1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** st1_u8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_28, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svst1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** st1_u8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_32, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svst1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m1, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m2, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m3, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svst1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** st1_u8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m4, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svst1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** st1_u8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m32, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svst1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** st1_u8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m36, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svst1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** st1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z17, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z22, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u8_z28:
+** st1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z28, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn0, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn7, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u8_pn15:
+** st1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn15, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u8_0:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_0, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_1, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_2, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_3, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_4, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_28, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_32, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_bf16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_bf16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_bf16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_bf16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_bf16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_bf16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_bf16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_bf16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_bf16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_bf16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_bf16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_bf16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_bf16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_bf16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_bf16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_bf16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_bf16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_bf16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_bf16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_bf16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_bf16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_bf16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_bf16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_bf16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_bf16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_bf16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_bf16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_bf16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_bf16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_bf16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_bf16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_bf16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_f16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_base, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_index, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_1, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_f16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_2, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_f16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_14, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_16, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m1, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_f16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m2, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_f16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m16, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m18, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z17, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z22, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z28, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn0, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn7, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn15, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_0, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_1, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_f16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_2, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_f16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_14, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_16, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_f16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_f16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_f16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_base, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_index, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_1, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_2, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_3, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_f16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_4, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_f16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_28, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_f16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_32, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m1, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m2, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m3, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_f16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m4, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_f16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m32, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_f16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m36, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z17, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z22, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z28, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn0, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn7, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn15, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_0, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_1, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_2, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_3, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_f16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_4, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_f16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_28, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_f16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_32, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_f16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_f16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_f16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_f32_base:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_base, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f32_index:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_index, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_1, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/*
+** stnt1_f32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_2, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** stnt1_f32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_14, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svstnt1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_16, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svstnt1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m1, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/*
+** stnt1_f32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m2, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** stnt1_f32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m16, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svstnt1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** stnt1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m18, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svstnt1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** stnt1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z17, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f32_z22:
+** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z22, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f32_z28:
+** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z28, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn0, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn7, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f32_pn15:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn15, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f32_0:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_0, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_1, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_f32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_2, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_f32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_14, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_16, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_f32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_f32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_f32_base:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_base, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f32_index:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_index, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_1, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_2, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_3, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svstnt1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** stnt1_f32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_4, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svstnt1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** stnt1_f32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_28, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svstnt1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** stnt1_f32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_32, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svstnt1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m1, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m2, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m3, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svstnt1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** stnt1_f32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m4, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svstnt1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** stnt1_f32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m32, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svstnt1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** stnt1_f32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m36, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svstnt1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** stnt1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z17, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z22, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f32_z28:
+** stnt1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z28, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn0, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn7, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f32_pn15:
+** stnt1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn15, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f32_0:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_0, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_1, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_2, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_3, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_f32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_4, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_f32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_28, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_f32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_32, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_f32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_f32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_f32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_f64_base:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_base, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f64_index:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_index, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_1, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/*
+** stnt1_f64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_2, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** stnt1_f64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_14, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svstnt1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_16, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svstnt1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m1, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/*
+** stnt1_f64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m2, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** stnt1_f64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m16, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svstnt1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** stnt1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m18, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svstnt1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** stnt1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z17, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f64_z22:
+** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z22, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f64_z28:
+** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z28, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn0, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn7, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f64_pn15:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn15, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f64_0:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_0, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_1, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_f64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_2, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_f64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_14, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_16, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_f64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_f64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_f64_base:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_base, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f64_index:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_index, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_1, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_2, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_3, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svstnt1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** stnt1_f64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_4, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svstnt1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** stnt1_f64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_28, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svstnt1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** stnt1_f64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_32, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svstnt1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m1, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m2, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m3, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svstnt1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** stnt1_f64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m4, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svstnt1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** stnt1_f64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m32, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svstnt1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** stnt1_f64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m36, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svstnt1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** stnt1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z17, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z22, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f64_z28:
+** stnt1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z28, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn0, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn7, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f64_pn15:
+** stnt1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn15, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f64_0:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_0, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_1, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_2, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_3, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_f64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_4, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_f64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_28, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_f64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_32, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_f64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_f64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_f64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_s16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_base, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_index, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_1, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_s16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_2, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_s16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_14, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_16, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m1, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_s16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m2, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_s16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m16, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m18, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z17, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z22, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z28, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn0, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn7, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn15, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_0, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_1, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_2, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_14, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_16, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m1, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m2, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m16, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m18, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_x1, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_s16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_base, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_index, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_1, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_2, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_3, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_s16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_4, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_s16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_28, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_s16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_32, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m1, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m2, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m3, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_s16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m4, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_s16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m32, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_s16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m36, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z17, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z22, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z28, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn0, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn7, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn15, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_0, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_1, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_2, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_3, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_4, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_28, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_32, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m1, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m2, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m3, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m4, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m32, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m36, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_x1, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_s32_base:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_base, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s32_index:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_index, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_1, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/*
+** stnt1_s32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_2, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** stnt1_s32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_14, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svstnt1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_16, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svstnt1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m1, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/*
+** stnt1_s32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m2, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** stnt1_s32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m16, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svstnt1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** stnt1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m18, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svstnt1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** stnt1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z17, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s32_z22:
+** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z22, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s32_z28:
+** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z28, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn0, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn7, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s32_pn15:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn15, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s32_0:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_0, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_1, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_2, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_14, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_16, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m1, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m2, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m16, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m18, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_x1, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_s32_base:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_base, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s32_index:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_index, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_1, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_2, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_3, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svstnt1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** stnt1_s32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_4, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svstnt1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** stnt1_s32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_28, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svstnt1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** stnt1_s32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_32, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svstnt1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m1, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m2, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m3, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svstnt1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** stnt1_s32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m4, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svstnt1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** stnt1_s32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m32, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svstnt1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** stnt1_s32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m36, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svstnt1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** stnt1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z17, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z22, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s32_z28:
+** stnt1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z28, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn0, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn7, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s32_pn15:
+** stnt1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn15, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s32_0:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_0, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_1, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_2, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_3, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_4, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_28, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_32, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m1, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m2, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m3, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m4, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m32, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m36, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_x1, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_s64_base:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_base, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s64_index:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_index, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_1, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/*
+** stnt1_s64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_2, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** stnt1_s64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_14, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svstnt1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_16, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svstnt1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m1, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/*
+** stnt1_s64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m2, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** stnt1_s64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m16, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svstnt1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** stnt1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m18, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svstnt1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** stnt1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z17, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s64_z22:
+** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z22, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s64_z28:
+** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z28, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn0, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn7, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s64_pn15:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn15, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s64_0:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_0, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_1, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_2, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_14, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_16, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m1, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m2, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m16, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m18, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_x1, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_s64_base:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_base, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s64_index:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_index, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_1, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_2, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_3, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svstnt1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** stnt1_s64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_4, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svstnt1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** stnt1_s64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_28, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svstnt1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** stnt1_s64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_32, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svstnt1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m1, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m2, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m3, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svstnt1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** stnt1_s64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m4, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svstnt1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** stnt1_s64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m32, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svstnt1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** stnt1_s64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m36, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svstnt1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** stnt1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z17, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z22, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s64_z28:
+** stnt1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z28, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn0, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn7, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s64_pn15:
+** stnt1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn15, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s64_0:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_0, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_1, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_2, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_3, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_4, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_28, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_32, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m1, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m2, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m3, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m4, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m32, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m36, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_x1, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_s8_base:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_base, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s8_index:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_index, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_1, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/*
+** stnt1_s8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_2, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** stnt1_s8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_14, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svstnt1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_16, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svstnt1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m1, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/*
+** stnt1_s8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m2, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** stnt1_s8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m16, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svstnt1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** stnt1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m18, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svstnt1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** stnt1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z17, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s8_z22:
+** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z22, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s8_z28:
+** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z28, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn0, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn7, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s8_pn15:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn15, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s8_0:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_0, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_1, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_2, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_14, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_16, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m1, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m2, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m16, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m18, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_x1, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_s8_base:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_base, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s8_index:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_index, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_1, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_2, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_3, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svstnt1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** stnt1_s8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_4, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svstnt1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** stnt1_s8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_28, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svstnt1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** stnt1_s8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_32, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svstnt1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m1, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m2, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m3, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svstnt1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** stnt1_s8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m4, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svstnt1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** stnt1_s8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m32, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svstnt1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** stnt1_s8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m36, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svstnt1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** stnt1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z17, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z22, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s8_z28:
+** stnt1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z28, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn0, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn7, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s8_pn15:
+** stnt1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn15, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s8_0:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_0, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_1, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_2, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_3, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_4, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_28, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_32, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m1, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m2, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m3, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m4, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m32, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m36, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_x1, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_u16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_base, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_index, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_1, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_u16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_2, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_u16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_14, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_16, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m1, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_u16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m2, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_u16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m16, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m18, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z17, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z22, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z28, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn0, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn7, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn15, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_0, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_1, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_2, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_14, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_16, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_u16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_base, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_index, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_1, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_2, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_3, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_u16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_4, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_u16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_28, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_u16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_32, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m1, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m2, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m3, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_u16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m4, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_u16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m32, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_u16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m36, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z17, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z22, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z28, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn0, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn7, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn15, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_0, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_1, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_2, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_3, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_4, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_28, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_32, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_u32_base:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_base, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u32_index:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_index, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_1, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/*
+** stnt1_u32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_2, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** stnt1_u32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_14, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svstnt1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_16, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svstnt1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m1, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/*
+** stnt1_u32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m2, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** stnt1_u32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m16, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svstnt1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** stnt1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m18, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svstnt1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** stnt1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z17, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u32_z22:
+** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z22, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u32_z28:
+** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z28, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn0, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn7, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u32_pn15:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn15, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u32_0:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_0, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_1, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_2, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_14, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_16, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_u32_base:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_base, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u32_index:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_index, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_1, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_2, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_3, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svstnt1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** stnt1_u32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_4, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svstnt1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** stnt1_u32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_28, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svstnt1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** stnt1_u32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_32, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svstnt1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m1, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m2, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m3, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svstnt1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** stnt1_u32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m4, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svstnt1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** stnt1_u32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m32, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svstnt1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** stnt1_u32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m36, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svstnt1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** stnt1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z17, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z22, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u32_z28:
+** stnt1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z28, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn0, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn7, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u32_pn15:
+** stnt1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn15, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u32_0:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_0, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_1, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_2, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_3, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_4, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_28, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_32, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_u64_base:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_base, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u64_index:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_index, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_1, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/*
+** stnt1_u64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_2, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** stnt1_u64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_14, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svstnt1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_16, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svstnt1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m1, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/*
+** stnt1_u64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m2, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** stnt1_u64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m16, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svstnt1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** stnt1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m18, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svstnt1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** stnt1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z17, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u64_z22:
+** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z22, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u64_z28:
+** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z28, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn0, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn7, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u64_pn15:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn15, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u64_0:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_0, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_1, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_2, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_14, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_16, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_u64_base:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_base, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u64_index:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_index, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_1, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_2, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_3, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svstnt1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** stnt1_u64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_4, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svstnt1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** stnt1_u64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_28, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svstnt1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** stnt1_u64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_32, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svstnt1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m1, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m2, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m3, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svstnt1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** stnt1_u64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m4, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svstnt1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** stnt1_u64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m32, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svstnt1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** stnt1_u64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m36, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svstnt1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** stnt1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z17, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z22, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u64_z28:
+** stnt1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z28, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn0, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn7, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u64_pn15:
+** stnt1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn15, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u64_0:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_0, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_1, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_2, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_3, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_4, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_28, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_32, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_u8_base:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_base, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u8_index:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_index, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_1, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/*
+** stnt1_u8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_2, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** stnt1_u8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_14, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svstnt1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_16, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svstnt1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m1, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/*
+** stnt1_u8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m2, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** stnt1_u8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m16, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svstnt1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** stnt1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m18, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svstnt1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** stnt1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z17, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u8_z22:
+** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z22, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u8_z28:
+** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z28, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn0, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn7, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u8_pn15:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn15, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u8_0:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_0, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_1, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_2, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_14, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_16, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** stnt1_u8_base:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_base, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u8_index:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_index, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_1, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_2, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_3, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svstnt1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** stnt1_u8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_4, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svstnt1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** stnt1_u8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_28, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svstnt1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** stnt1_u8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_32, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svstnt1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m1, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m2, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m3, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svstnt1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** stnt1_u8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m4, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svstnt1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** stnt1_u8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m32, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svstnt1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** stnt1_u8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m36, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svstnt1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** stnt1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z17, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z22, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u8_z28:
+** stnt1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z28, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn0, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn7, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u8_pn15:
+** stnt1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn15, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u8_0:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_0, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_1, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_2, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_3, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_4, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_28, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_32, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#define SHARED_ZT0
+#include "test_sme2_acle.h"
+
+/*
+** str_zt0_x0:
+** str zt0, \[x0\]
+** ret
+*/
+PROTO (str_zt0_x0, void, (char *x0)) { svstr_zt (0, x0); }
+
+/*
+** str_zt0_x0p1:
+** add (x[0-9]+), x0, #?1
+** str zt0, \[\1\]
+** ret
+*/
+PROTO (str_zt0_x0p1, void, (char *x0)) { svstr_zt (0, x0 + 1); }
+
+/*
+** str_zt0_x0p64:
+** add (x[0-9]+), x0, #?64
+** str zt0, \[\1\]
+** ret
+*/
+PROTO (str_zt0_x0p64, void, (char *x0)) { svstr_zt (0, x0 + 64); }
+
+/*
+** str_zt0_x0_vl1:
+** incb x0
+** str zt0, \[x0\]
+** ret
+*/
+PROTO (str_zt0_x0_vl1, void, (char *x0)) { svstr_zt (0, x0 + svcntb()); }
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_0_z0_z0, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (0, z0, z0),
+ svsub_write_za32_vg1x2 (0, z0, z0))
+
+/*
+** sub_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w0_z0_z0, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (w0, z0, z0),
+ svsub_write_za32_vg1x2 (w0, z0, z0))
+
+/*
+** sub_write_w8_z0_z4:
+** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z4, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (w8, z0, z4),
+ svsub_write_za32_vg1x2 (w8, z0, z4))
+
+/*
+** sub_write_w8_z4_z18:
+** sub za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z4_z18, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (w8, z4, z18),
+ svsub_write_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_write_w8_z23_z0:
+** ...
+** sub za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z23_z0, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (w8, z23, z0),
+ svsub_write_za32_vg1x2 (w8, z23, z0))
+
+/*
+** sub_write_w8_z18_z23:
+** ...
+** sub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z18_z23, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (w8, z18, z23),
+ svsub_write_za32_vg1x2 (w8, z18, z23))
+
+/*
+** sub_write_w8_z4_z28:
+** sub za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z4_z28, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (w8, z4, z28),
+ svsub_write_za32_vg1x2 (w8, z4, z28))
+
+/*
+** sub_write_w8p7_z4_z0:
+** sub za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p7_z4_z0, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (w8 + 7, z4, z0),
+ svsub_write_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** sub_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p8_z4_z4, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (w8 + 8, z4, z4),
+ svsub_write_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** sub_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8m1_z4_z0, svint32x2_t,
+ svsub_write_za32_s32_vg1x2 (w8 - 1, z4, z0),
+ svsub_write_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** sub_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svint32x2_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x2 (0, z1, z0),
+ svsub_write_za32_vg1x2 (0, z1, z0))
+
+/*
+** sub_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svint32x2_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x2 (w0, z1, z0),
+ svsub_write_za32_vg1x2 (w0, z1, z0))
+
+/*
+** sub_write_single_w8_z1_z0:
+** sub za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svint32x2_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x2 (w8, z1, z0),
+ svsub_write_za32_vg1x2 (w8, z1, z0))
+
+/*
+** sub_write_single_w8p7_z1_z0:
+** sub za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svint32x2_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x2 (w8 + 7, z1, z0),
+ svsub_write_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** sub_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svint32x2_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x2 (w8 + 8, z1, z0),
+ svsub_write_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** sub_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svint32x2_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x2 (w0 - 1, z1, z0),
+ svsub_write_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** sub_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svint32x2_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x2 (w8, z0, z15),
+ svsub_write_za32_vg1x2 (w8, z0, z15))
+
+/*
+** sub_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sub za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svint32x2_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x2 (w8, z20, z16),
+ svsub_write_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_0_z0_z0, svint32x4_t,
+ svsub_write_za32_s32_vg1x4 (0, z0, z0),
+ svsub_write_za32_vg1x4 (0, z0, z0))
+
+/*
+** sub_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w0_z0_z0, svint32x4_t,
+ svsub_write_za32_s32_vg1x4 (w0, z0, z0),
+ svsub_write_za32_vg1x4 (w0, z0, z0))
+
+/*
+** sub_write_w8_z0_z4:
+** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z4, svint32x4_t,
+ svsub_write_za32_s32_vg1x4 (w8, z0, z4),
+ svsub_write_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_write_w8_z0_z18:
+** ...
+** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z18, svint32x4_t,
+ svsub_write_za32_s32_vg1x4 (w8, z0, z18),
+ svsub_write_za32_vg1x4 (w8, z0, z18))
+
+/*
+** sub_write_w8_z18_z28:
+** ...
+** sub za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z18_z28, svint32x4_t,
+ svsub_write_za32_s32_vg1x4 (w8, z18, z28),
+ svsub_write_za32_vg1x4 (w8, z18, z28))
+
+/*
+** sub_write_w8_z28_z23:
+** ...
+** sub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z28_z23, svint32x4_t,
+ svsub_write_za32_s32_vg1x4 (w8, z28, z23),
+ svsub_write_za32_vg1x4 (w8, z28, z23))
+
+/*
+** sub_write_w8p7_z4_z0:
+** sub za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p7_z4_z0, svint32x4_t,
+ svsub_write_za32_s32_vg1x4 (w8 + 7, z4, z0),
+ svsub_write_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** sub_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p8_z4_z4, svint32x4_t,
+ svsub_write_za32_s32_vg1x4 (w8 + 8, z4, z4),
+ svsub_write_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** sub_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8m1_z4_z0, svint32x4_t,
+ svsub_write_za32_s32_vg1x4 (w8 - 1, z4, z0),
+ svsub_write_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** sub_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svint32x4_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x4 (0, z1, z0),
+ svsub_write_za32_vg1x4 (0, z1, z0))
+
+/*
+** sub_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svint32x4_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x4 (w0, z1, z0),
+ svsub_write_za32_vg1x4 (w0, z1, z0))
+
+/*
+** sub_write_single_w8_z1_z0:
+** sub za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svint32x4_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x4 (w8, z1, z0),
+ svsub_write_za32_vg1x4 (w8, z1, z0))
+
+/*
+** sub_write_single_w8p7_z1_z0:
+** sub za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svint32x4_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x4 (w8 + 7, z1, z0),
+ svsub_write_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** sub_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svint32x4_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x4 (w8 + 8, z1, z0),
+ svsub_write_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** sub_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svint32x4_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x4 (w0 - 1, z1, z0),
+ svsub_write_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** sub_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svint32x4_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x4 (w8, z0, z15),
+ svsub_write_za32_vg1x4 (w8, z0, z15))
+
+/*
+** sub_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sub za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svint32x4_t, svint32_t,
+ svsub_write_single_za32_s32_vg1x4 (w8, z20, z16),
+ svsub_write_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_0_z0_z0, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (0, z0, z0),
+ svsub_write_za32_vg1x2 (0, z0, z0))
+
+/*
+** sub_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w0_z0_z0, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (w0, z0, z0),
+ svsub_write_za32_vg1x2 (w0, z0, z0))
+
+/*
+** sub_write_w8_z0_z4:
+** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z4, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (w8, z0, z4),
+ svsub_write_za32_vg1x2 (w8, z0, z4))
+
+/*
+** sub_write_w8_z4_z18:
+** sub za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z4_z18, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (w8, z4, z18),
+ svsub_write_za32_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_write_w8_z23_z0:
+** ...
+** sub za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z23_z0, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (w8, z23, z0),
+ svsub_write_za32_vg1x2 (w8, z23, z0))
+
+/*
+** sub_write_w8_z18_z23:
+** ...
+** sub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z18_z23, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (w8, z18, z23),
+ svsub_write_za32_vg1x2 (w8, z18, z23))
+
+/*
+** sub_write_w8_z4_z28:
+** sub za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z4_z28, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (w8, z4, z28),
+ svsub_write_za32_vg1x2 (w8, z4, z28))
+
+/*
+** sub_write_w8p7_z4_z0:
+** sub za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p7_z4_z0, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (w8 + 7, z4, z0),
+ svsub_write_za32_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** sub_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p8_z4_z4, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (w8 + 8, z4, z4),
+ svsub_write_za32_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** sub_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8m1_z4_z0, svuint32x2_t,
+ svsub_write_za32_u32_vg1x2 (w8 - 1, z4, z0),
+ svsub_write_za32_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** sub_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svuint32x2_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x2 (0, z1, z0),
+ svsub_write_za32_vg1x2 (0, z1, z0))
+
+/*
+** sub_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svuint32x2_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x2 (w0, z1, z0),
+ svsub_write_za32_vg1x2 (w0, z1, z0))
+
+/*
+** sub_write_single_w8_z1_z0:
+** sub za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svuint32x2_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x2 (w8, z1, z0),
+ svsub_write_za32_vg1x2 (w8, z1, z0))
+
+/*
+** sub_write_single_w8p7_z1_z0:
+** sub za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svuint32x2_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x2 (w8 + 7, z1, z0),
+ svsub_write_za32_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** sub_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svuint32x2_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x2 (w8 + 8, z1, z0),
+ svsub_write_za32_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** sub_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svuint32x2_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x2 (w0 - 1, z1, z0),
+ svsub_write_za32_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** sub_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svuint32x2_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x2 (w8, z0, z15),
+ svsub_write_za32_vg1x2 (w8, z0, z15))
+
+/*
+** sub_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sub za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svuint32x2_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x2 (w8, z20, z16),
+ svsub_write_za32_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_0_z0_z0, svuint32x4_t,
+ svsub_write_za32_u32_vg1x4 (0, z0, z0),
+ svsub_write_za32_vg1x4 (0, z0, z0))
+
+/*
+** sub_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w0_z0_z0, svuint32x4_t,
+ svsub_write_za32_u32_vg1x4 (w0, z0, z0),
+ svsub_write_za32_vg1x4 (w0, z0, z0))
+
+/*
+** sub_write_w8_z0_z4:
+** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z4, svuint32x4_t,
+ svsub_write_za32_u32_vg1x4 (w8, z0, z4),
+ svsub_write_za32_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_write_w8_z0_z18:
+** ...
+** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z18, svuint32x4_t,
+ svsub_write_za32_u32_vg1x4 (w8, z0, z18),
+ svsub_write_za32_vg1x4 (w8, z0, z18))
+
+/*
+** sub_write_w8_z18_z28:
+** ...
+** sub za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z18_z28, svuint32x4_t,
+ svsub_write_za32_u32_vg1x4 (w8, z18, z28),
+ svsub_write_za32_vg1x4 (w8, z18, z28))
+
+/*
+** sub_write_w8_z28_z23:
+** ...
+** sub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z28_z23, svuint32x4_t,
+ svsub_write_za32_u32_vg1x4 (w8, z28, z23),
+ svsub_write_za32_vg1x4 (w8, z28, z23))
+
+/*
+** sub_write_w8p7_z4_z0:
+** sub za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p7_z4_z0, svuint32x4_t,
+ svsub_write_za32_u32_vg1x4 (w8 + 7, z4, z0),
+ svsub_write_za32_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** sub_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p8_z4_z4, svuint32x4_t,
+ svsub_write_za32_u32_vg1x4 (w8 + 8, z4, z4),
+ svsub_write_za32_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** sub_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8m1_z4_z0, svuint32x4_t,
+ svsub_write_za32_u32_vg1x4 (w8 - 1, z4, z0),
+ svsub_write_za32_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** sub_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svuint32x4_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x4 (0, z1, z0),
+ svsub_write_za32_vg1x4 (0, z1, z0))
+
+/*
+** sub_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svuint32x4_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x4 (w0, z1, z0),
+ svsub_write_za32_vg1x4 (w0, z1, z0))
+
+/*
+** sub_write_single_w8_z1_z0:
+** sub za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svuint32x4_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x4 (w8, z1, z0),
+ svsub_write_za32_vg1x4 (w8, z1, z0))
+
+/*
+** sub_write_single_w8p7_z1_z0:
+** sub za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svuint32x4_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x4 (w8 + 7, z1, z0),
+ svsub_write_za32_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** sub_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svuint32x4_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x4 (w8 + 8, z1, z0),
+ svsub_write_za32_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** sub_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svuint32x4_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x4 (w0 - 1, z1, z0),
+ svsub_write_za32_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** sub_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svuint32x4_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x4 (w8, z0, z15),
+ svsub_write_za32_vg1x4 (w8, z0, z15))
+
+/*
+** sub_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sub za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svuint32x4_t, svuint32_t,
+ svsub_write_single_za32_u32_vg1x4 (w8, z20, z16),
+ svsub_write_za32_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_0_z0_z0, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (0, z0, z0),
+ svsub_write_za64_vg1x2 (0, z0, z0))
+
+/*
+** sub_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w0_z0_z0, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (w0, z0, z0),
+ svsub_write_za64_vg1x2 (w0, z0, z0))
+
+/*
+** sub_write_w8_z0_z4:
+** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z4, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (w8, z0, z4),
+ svsub_write_za64_vg1x2 (w8, z0, z4))
+
+/*
+** sub_write_w8_z4_z18:
+** sub za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z4_z18, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (w8, z4, z18),
+ svsub_write_za64_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_write_w8_z23_z0:
+** ...
+** sub za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z23_z0, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (w8, z23, z0),
+ svsub_write_za64_vg1x2 (w8, z23, z0))
+
+/*
+** sub_write_w8_z18_z23:
+** ...
+** sub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z18_z23, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (w8, z18, z23),
+ svsub_write_za64_vg1x2 (w8, z18, z23))
+
+/*
+** sub_write_w8_z4_z28:
+** sub za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z4_z28, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (w8, z4, z28),
+ svsub_write_za64_vg1x2 (w8, z4, z28))
+
+/*
+** sub_write_w8p7_z4_z0:
+** sub za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p7_z4_z0, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (w8 + 7, z4, z0),
+ svsub_write_za64_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** sub_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p8_z4_z4, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (w8 + 8, z4, z4),
+ svsub_write_za64_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** sub_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8m1_z4_z0, svint64x2_t,
+ svsub_write_za64_s64_vg1x2 (w8 - 1, z4, z0),
+ svsub_write_za64_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** sub_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svint64x2_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x2 (0, z1, z0),
+ svsub_write_za64_vg1x2 (0, z1, z0))
+
+/*
+** sub_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svint64x2_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x2 (w0, z1, z0),
+ svsub_write_za64_vg1x2 (w0, z1, z0))
+
+/*
+** sub_write_single_w8_z1_z0:
+** sub za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svint64x2_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x2 (w8, z1, z0),
+ svsub_write_za64_vg1x2 (w8, z1, z0))
+
+/*
+** sub_write_single_w8p7_z1_z0:
+** sub za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svint64x2_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x2 (w8 + 7, z1, z0),
+ svsub_write_za64_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** sub_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svint64x2_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x2 (w8 + 8, z1, z0),
+ svsub_write_za64_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** sub_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svint64x2_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x2 (w0 - 1, z1, z0),
+ svsub_write_za64_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** sub_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svint64x2_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x2 (w8, z0, z15),
+ svsub_write_za64_vg1x2 (w8, z0, z15))
+
+/*
+** sub_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sub za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svint64x2_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x2 (w8, z20, z16),
+ svsub_write_za64_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_0_z0_z0, svint64x4_t,
+ svsub_write_za64_s64_vg1x4 (0, z0, z0),
+ svsub_write_za64_vg1x4 (0, z0, z0))
+
+/*
+** sub_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w0_z0_z0, svint64x4_t,
+ svsub_write_za64_s64_vg1x4 (w0, z0, z0),
+ svsub_write_za64_vg1x4 (w0, z0, z0))
+
+/*
+** sub_write_w8_z0_z4:
+** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z4, svint64x4_t,
+ svsub_write_za64_s64_vg1x4 (w8, z0, z4),
+ svsub_write_za64_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_write_w8_z0_z18:
+** ...
+** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z18, svint64x4_t,
+ svsub_write_za64_s64_vg1x4 (w8, z0, z18),
+ svsub_write_za64_vg1x4 (w8, z0, z18))
+
+/*
+** sub_write_w8_z18_z28:
+** ...
+** sub za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z18_z28, svint64x4_t,
+ svsub_write_za64_s64_vg1x4 (w8, z18, z28),
+ svsub_write_za64_vg1x4 (w8, z18, z28))
+
+/*
+** sub_write_w8_z28_z23:
+** ...
+** sub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z28_z23, svint64x4_t,
+ svsub_write_za64_s64_vg1x4 (w8, z28, z23),
+ svsub_write_za64_vg1x4 (w8, z28, z23))
+
+/*
+** sub_write_w8p7_z4_z0:
+** sub za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p7_z4_z0, svint64x4_t,
+ svsub_write_za64_s64_vg1x4 (w8 + 7, z4, z0),
+ svsub_write_za64_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** sub_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p8_z4_z4, svint64x4_t,
+ svsub_write_za64_s64_vg1x4 (w8 + 8, z4, z4),
+ svsub_write_za64_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** sub_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8m1_z4_z0, svint64x4_t,
+ svsub_write_za64_s64_vg1x4 (w8 - 1, z4, z0),
+ svsub_write_za64_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** sub_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svint64x4_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x4 (0, z1, z0),
+ svsub_write_za64_vg1x4 (0, z1, z0))
+
+/*
+** sub_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svint64x4_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x4 (w0, z1, z0),
+ svsub_write_za64_vg1x4 (w0, z1, z0))
+
+/*
+** sub_write_single_w8_z1_z0:
+** sub za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svint64x4_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x4 (w8, z1, z0),
+ svsub_write_za64_vg1x4 (w8, z1, z0))
+
+/*
+** sub_write_single_w8p7_z1_z0:
+** sub za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svint64x4_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x4 (w8 + 7, z1, z0),
+ svsub_write_za64_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** sub_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svint64x4_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x4 (w8 + 8, z1, z0),
+ svsub_write_za64_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** sub_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svint64x4_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x4 (w0 - 1, z1, z0),
+ svsub_write_za64_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** sub_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svint64x4_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x4 (w8, z0, z15),
+ svsub_write_za64_vg1x4 (w8, z0, z15))
+
+/*
+** sub_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sub za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svint64x4_t, svint64_t,
+ svsub_write_single_za64_s64_vg1x4 (w8, z20, z16),
+ svsub_write_za64_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_0_z0_z0, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (0, z0, z0),
+ svsub_write_za64_vg1x2 (0, z0, z0))
+
+/*
+** sub_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w0_z0_z0, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (w0, z0, z0),
+ svsub_write_za64_vg1x2 (w0, z0, z0))
+
+/*
+** sub_write_w8_z0_z4:
+** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z4, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (w8, z0, z4),
+ svsub_write_za64_vg1x2 (w8, z0, z4))
+
+/*
+** sub_write_w8_z4_z18:
+** sub za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z4_z18, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (w8, z4, z18),
+ svsub_write_za64_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_write_w8_z23_z0:
+** ...
+** sub za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z23_z0, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (w8, z23, z0),
+ svsub_write_za64_vg1x2 (w8, z23, z0))
+
+/*
+** sub_write_w8_z18_z23:
+** ...
+** sub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z18_z23, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (w8, z18, z23),
+ svsub_write_za64_vg1x2 (w8, z18, z23))
+
+/*
+** sub_write_w8_z4_z28:
+** sub za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z4_z28, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (w8, z4, z28),
+ svsub_write_za64_vg1x2 (w8, z4, z28))
+
+/*
+** sub_write_w8p7_z4_z0:
+** sub za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p7_z4_z0, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (w8 + 7, z4, z0),
+ svsub_write_za64_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** sub_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p8_z4_z4, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (w8 + 8, z4, z4),
+ svsub_write_za64_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** sub_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8m1_z4_z0, svuint64x2_t,
+ svsub_write_za64_u64_vg1x2 (w8 - 1, z4, z0),
+ svsub_write_za64_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** sub_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svuint64x2_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x2 (0, z1, z0),
+ svsub_write_za64_vg1x2 (0, z1, z0))
+
+/*
+** sub_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svuint64x2_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x2 (w0, z1, z0),
+ svsub_write_za64_vg1x2 (w0, z1, z0))
+
+/*
+** sub_write_single_w8_z1_z0:
+** sub za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svuint64x2_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x2 (w8, z1, z0),
+ svsub_write_za64_vg1x2 (w8, z1, z0))
+
+/*
+** sub_write_single_w8p7_z1_z0:
+** sub za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svuint64x2_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x2 (w8 + 7, z1, z0),
+ svsub_write_za64_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** sub_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svuint64x2_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x2 (w8 + 8, z1, z0),
+ svsub_write_za64_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** sub_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svuint64x2_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x2 (w0 - 1, z1, z0),
+ svsub_write_za64_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** sub_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svuint64x2_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x2 (w8, z0, z15),
+ svsub_write_za64_vg1x2 (w8, z0, z15))
+
+/*
+** sub_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sub za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svuint64x2_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x2 (w8, z20, z16),
+ svsub_write_za64_vg1x2 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_write_0_z0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_0_z0_z0, svuint64x4_t,
+ svsub_write_za64_u64_vg1x4 (0, z0, z0),
+ svsub_write_za64_vg1x4 (0, z0, z0))
+
+/*
+** sub_write_w0_z0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w0_z0_z0, svuint64x4_t,
+ svsub_write_za64_u64_vg1x4 (w0, z0, z0),
+ svsub_write_za64_vg1x4 (w0, z0, z0))
+
+/*
+** sub_write_w8_z0_z4:
+** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z4, svuint64x4_t,
+ svsub_write_za64_u64_vg1x4 (w8, z0, z4),
+ svsub_write_za64_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_write_w8_z0_z18:
+** ...
+** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z0_z18, svuint64x4_t,
+ svsub_write_za64_u64_vg1x4 (w8, z0, z18),
+ svsub_write_za64_vg1x4 (w8, z0, z18))
+
+/*
+** sub_write_w8_z18_z28:
+** ...
+** sub za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z18_z28, svuint64x4_t,
+ svsub_write_za64_u64_vg1x4 (w8, z18, z28),
+ svsub_write_za64_vg1x4 (w8, z18, z28))
+
+/*
+** sub_write_w8_z28_z23:
+** ...
+** sub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_write_w8_z28_z23, svuint64x4_t,
+ svsub_write_za64_u64_vg1x4 (w8, z28, z23),
+ svsub_write_za64_vg1x4 (w8, z28, z23))
+
+/*
+** sub_write_w8p7_z4_z0:
+** sub za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p7_z4_z0, svuint64x4_t,
+ svsub_write_za64_u64_vg1x4 (w8 + 7, z4, z0),
+ svsub_write_za64_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** sub_write_w8p8_z4_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8p8_z4_z4, svuint64x4_t,
+ svsub_write_za64_u64_vg1x4 (w8 + 8, z4, z4),
+ svsub_write_za64_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** sub_write_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_write_w8m1_z4_z0, svuint64x4_t,
+ svsub_write_za64_u64_vg1x4 (w8 - 1, z4, z0),
+ svsub_write_za64_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** sub_write_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svuint64x4_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x4 (0, z1, z0),
+ svsub_write_za64_vg1x4 (0, z1, z0))
+
+/*
+** sub_write_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svuint64x4_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x4 (w0, z1, z0),
+ svsub_write_za64_vg1x4 (w0, z1, z0))
+
+/*
+** sub_write_single_w8_z1_z0:
+** sub za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svuint64x4_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x4 (w8, z1, z0),
+ svsub_write_za64_vg1x4 (w8, z1, z0))
+
+/*
+** sub_write_single_w8p7_z1_z0:
+** sub za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svuint64x4_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x4 (w8 + 7, z1, z0),
+ svsub_write_za64_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** sub_write_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svuint64x4_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x4 (w8 + 8, z1, z0),
+ svsub_write_za64_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** sub_write_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svuint64x4_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x4 (w0 - 1, z1, z0),
+ svsub_write_za64_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** sub_write_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svuint64x4_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x4 (w8, z0, z15),
+ svsub_write_za64_vg1x4 (w8, z0, z15))
+
+/*
+** sub_write_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sub za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d
+** ret
+*/
+TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svuint64x4_t, svuint64_t,
+ svsub_write_single_za64_u64_vg1x4 (w8, z20, z16),
+ svsub_write_za64_vg1x4 (w8, z20, z16))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (0, z0),
+ svsub_za32_vg1x2 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w0, z0),
+ svsub_za32_vg1x2 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w7, z0),
+ svsub_za32_vg1x2 (w7, z0))
+
+/*
+** sub_w8_z0:
+** fsub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w8, z0),
+ svsub_za32_vg1x2 (w8, z0))
+
+/*
+** sub_w11_z0:
+** fsub za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w11, z0),
+ svsub_za32_vg1x2 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w12, z0),
+ svsub_za32_vg1x2 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** fsub za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w8 + 7, z0),
+ svsub_za32_vg1x2 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w8 + 8, z0),
+ svsub_za32_vg1x2 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w8 - 1, z0),
+ svsub_za32_vg1x2 (w8 - 1, z0))
+
+/*
+** sub_w8_z18:
+** fsub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w8, z18),
+ svsub_za32_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** fsub za\.s\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w8, z23),
+ svsub_za32_vg1x2 (w8, z23))
+
+/*
+** sub_w8_z28:
+** fsub za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svfloat32x2_t,
+ svsub_za32_f32_vg1x2 (w8, z28),
+ svsub_za32_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (0, z0),
+ svsub_za32_vg1x4 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w0, z0),
+ svsub_za32_vg1x4 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w7, z0),
+ svsub_za32_vg1x4 (w7, z0))
+
+/*
+** sub_w8_z0:
+** fsub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w8, z0),
+ svsub_za32_vg1x4 (w8, z0))
+
+/*
+** sub_w11_z0:
+** fsub za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w11, z0),
+ svsub_za32_vg1x4 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w12, z0),
+ svsub_za32_vg1x4 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** fsub za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w8 + 7, z0),
+ svsub_za32_vg1x4 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w8 + 8, z0),
+ svsub_za32_vg1x4 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w8 - 1, z0),
+ svsub_za32_vg1x4 (w8 - 1, z0))
+
+/*
+** sub_w8_z4:
+** fsub za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z4, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w8, z4),
+ svsub_za32_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fsub za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w8, z18),
+ svsub_za32_vg1x4 (w8, z18))
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fsub za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w8, z23),
+ svsub_za32_vg1x4 (w8, z23))
+
+/*
+** sub_w8_z28:
+** fsub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svfloat32x4_t,
+ svsub_za32_f32_vg1x4 (w8, z28),
+ svsub_za32_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svint32x2_t,
+ svsub_za32_s32_vg1x2 (0, z0),
+ svsub_za32_vg1x2 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w0, z0),
+ svsub_za32_vg1x2 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w7, z0),
+ svsub_za32_vg1x2 (w7, z0))
+
+/*
+** sub_w8_z0:
+** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w8, z0),
+ svsub_za32_vg1x2 (w8, z0))
+
+/*
+** sub_w11_z0:
+** sub za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w11, z0),
+ svsub_za32_vg1x2 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w12, z0),
+ svsub_za32_vg1x2 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** sub za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w8 + 7, z0),
+ svsub_za32_vg1x2 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w8 + 8, z0),
+ svsub_za32_vg1x2 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w8 - 1, z0),
+ svsub_za32_vg1x2 (w8 - 1, z0))
+
+/*
+** sub_w8_z18:
+** sub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w8, z18),
+ svsub_za32_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.s\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w8, z23),
+ svsub_za32_vg1x2 (w8, z23))
+
+/*
+** sub_w8_z28:
+** sub za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svint32x2_t,
+ svsub_za32_s32_vg1x2 (w8, z28),
+ svsub_za32_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svint32x4_t,
+ svsub_za32_s32_vg1x4 (0, z0),
+ svsub_za32_vg1x4 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w0, z0),
+ svsub_za32_vg1x4 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w7, z0),
+ svsub_za32_vg1x4 (w7, z0))
+
+/*
+** sub_w8_z0:
+** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w8, z0),
+ svsub_za32_vg1x4 (w8, z0))
+
+/*
+** sub_w11_z0:
+** sub za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w11, z0),
+ svsub_za32_vg1x4 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w12, z0),
+ svsub_za32_vg1x4 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** sub za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w8 + 7, z0),
+ svsub_za32_vg1x4 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w8 + 8, z0),
+ svsub_za32_vg1x4 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w8 - 1, z0),
+ svsub_za32_vg1x4 (w8 - 1, z0))
+
+/*
+** sub_w8_z4:
+** sub za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z4, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w8, z4),
+ svsub_za32_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w8, z18),
+ svsub_za32_vg1x4 (w8, z18))
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w8, z23),
+ svsub_za32_vg1x4 (w8, z23))
+
+/*
+** sub_w8_z28:
+** sub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svint32x4_t,
+ svsub_za32_s32_vg1x4 (w8, z28),
+ svsub_za32_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (0, z0),
+ svsub_za32_vg1x2 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w0, z0),
+ svsub_za32_vg1x2 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w7, z0),
+ svsub_za32_vg1x2 (w7, z0))
+
+/*
+** sub_w8_z0:
+** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w8, z0),
+ svsub_za32_vg1x2 (w8, z0))
+
+/*
+** sub_w11_z0:
+** sub za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w11, z0),
+ svsub_za32_vg1x2 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w12, z0),
+ svsub_za32_vg1x2 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** sub za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w8 + 7, z0),
+ svsub_za32_vg1x2 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w8 + 8, z0),
+ svsub_za32_vg1x2 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w8 - 1, z0),
+ svsub_za32_vg1x2 (w8 - 1, z0))
+
+/*
+** sub_w8_z18:
+** sub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w8, z18),
+ svsub_za32_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.s\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w8, z23),
+ svsub_za32_vg1x2 (w8, z23))
+
+/*
+** sub_w8_z28:
+** sub za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svuint32x2_t,
+ svsub_za32_u32_vg1x2 (w8, z28),
+ svsub_za32_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (0, z0),
+ svsub_za32_vg1x4 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w0, z0),
+ svsub_za32_vg1x4 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w7, z0),
+ svsub_za32_vg1x4 (w7, z0))
+
+/*
+** sub_w8_z0:
+** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w8, z0),
+ svsub_za32_vg1x4 (w8, z0))
+
+/*
+** sub_w11_z0:
+** sub za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w11, z0),
+ svsub_za32_vg1x4 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w12, z0),
+ svsub_za32_vg1x4 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** sub za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w8 + 7, z0),
+ svsub_za32_vg1x4 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w8 + 8, z0),
+ svsub_za32_vg1x4 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w8 - 1, z0),
+ svsub_za32_vg1x4 (w8 - 1, z0))
+
+/*
+** sub_w8_z4:
+** sub za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z4, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w8, z4),
+ svsub_za32_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w8, z18),
+ svsub_za32_vg1x4 (w8, z18))
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.s\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w8, z23),
+ svsub_za32_vg1x4 (w8, z23))
+
+/*
+** sub_w8_z28:
+** sub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svuint32x4_t,
+ svsub_za32_u32_vg1x4 (w8, z28),
+ svsub_za32_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (0, z0),
+ svsub_za64_vg1x2 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w0, z0),
+ svsub_za64_vg1x2 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w7, z0),
+ svsub_za64_vg1x2 (w7, z0))
+
+/*
+** sub_w8_z0:
+** fsub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w8, z0),
+ svsub_za64_vg1x2 (w8, z0))
+
+/*
+** sub_w11_z0:
+** fsub za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w11, z0),
+ svsub_za64_vg1x2 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w12, z0),
+ svsub_za64_vg1x2 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** fsub za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w8 + 7, z0),
+ svsub_za64_vg1x2 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w8 + 8, z0),
+ svsub_za64_vg1x2 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w8 - 1, z0),
+ svsub_za64_vg1x2 (w8 - 1, z0))
+
+/*
+** sub_w8_z18:
+** fsub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w8, z18),
+ svsub_za64_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** fsub za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w8, z23),
+ svsub_za64_vg1x2 (w8, z23))
+
+/*
+** sub_w8_z28:
+** fsub za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svfloat64x2_t,
+ svsub_za64_f64_vg1x2 (w8, z28),
+ svsub_za64_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#pragma GCC target "+sme-f64f64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (0, z0),
+ svsub_za64_vg1x4 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w0, z0),
+ svsub_za64_vg1x4 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w7, z0),
+ svsub_za64_vg1x4 (w7, z0))
+
+/*
+** sub_w8_z0:
+** fsub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w8, z0),
+ svsub_za64_vg1x4 (w8, z0))
+
+/*
+** sub_w11_z0:
+** fsub za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w11, z0),
+ svsub_za64_vg1x4 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w12, z0),
+ svsub_za64_vg1x4 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** fsub za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w8 + 7, z0),
+ svsub_za64_vg1x4 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w8 + 8, z0),
+ svsub_za64_vg1x4 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w8 - 1, z0),
+ svsub_za64_vg1x4 (w8 - 1, z0))
+
+/*
+** sub_w8_z4:
+** fsub za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z4, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w8, z4),
+ svsub_za64_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fsub za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w8, z18),
+ svsub_za64_vg1x4 (w8, z18))
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fsub za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w8, z23),
+ svsub_za64_vg1x4 (w8, z23))
+
+/*
+** sub_w8_z28:
+** fsub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svfloat64x4_t,
+ svsub_za64_f64_vg1x4 (w8, z28),
+ svsub_za64_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svint64x2_t,
+ svsub_za64_s64_vg1x2 (0, z0),
+ svsub_za64_vg1x2 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w0, z0),
+ svsub_za64_vg1x2 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w7, z0),
+ svsub_za64_vg1x2 (w7, z0))
+
+/*
+** sub_w8_z0:
+** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w8, z0),
+ svsub_za64_vg1x2 (w8, z0))
+
+/*
+** sub_w11_z0:
+** sub za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w11, z0),
+ svsub_za64_vg1x2 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w12, z0),
+ svsub_za64_vg1x2 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** sub za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w8 + 7, z0),
+ svsub_za64_vg1x2 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w8 + 8, z0),
+ svsub_za64_vg1x2 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w8 - 1, z0),
+ svsub_za64_vg1x2 (w8 - 1, z0))
+
+/*
+** sub_w8_z18:
+** sub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w8, z18),
+ svsub_za64_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w8, z23),
+ svsub_za64_vg1x2 (w8, z23))
+
+/*
+** sub_w8_z28:
+** sub za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svint64x2_t,
+ svsub_za64_s64_vg1x2 (w8, z28),
+ svsub_za64_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svint64x4_t,
+ svsub_za64_s64_vg1x4 (0, z0),
+ svsub_za64_vg1x4 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w0, z0),
+ svsub_za64_vg1x4 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w7, z0),
+ svsub_za64_vg1x4 (w7, z0))
+
+/*
+** sub_w8_z0:
+** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w8, z0),
+ svsub_za64_vg1x4 (w8, z0))
+
+/*
+** sub_w11_z0:
+** sub za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w11, z0),
+ svsub_za64_vg1x4 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w12, z0),
+ svsub_za64_vg1x4 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** sub za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w8 + 7, z0),
+ svsub_za64_vg1x4 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w8 + 8, z0),
+ svsub_za64_vg1x4 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w8 - 1, z0),
+ svsub_za64_vg1x4 (w8 - 1, z0))
+
+/*
+** sub_w8_z4:
+** sub za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z4, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w8, z4),
+ svsub_za64_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w8, z18),
+ svsub_za64_vg1x4 (w8, z18))
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w8, z23),
+ svsub_za64_vg1x4 (w8, z23))
+
+/*
+** sub_w8_z28:
+** sub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svint64x4_t,
+ svsub_za64_s64_vg1x4 (w8, z28),
+ svsub_za64_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (0, z0),
+ svsub_za64_vg1x2 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w0, z0),
+ svsub_za64_vg1x2 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w7, z0),
+ svsub_za64_vg1x2 (w7, z0))
+
+/*
+** sub_w8_z0:
+** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w8, z0),
+ svsub_za64_vg1x2 (w8, z0))
+
+/*
+** sub_w11_z0:
+** sub za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w11, z0),
+ svsub_za64_vg1x2 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w12, z0),
+ svsub_za64_vg1x2 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** sub za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w8 + 7, z0),
+ svsub_za64_vg1x2 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w8 + 8, z0),
+ svsub_za64_vg1x2 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w8 - 1, z0),
+ svsub_za64_vg1x2 (w8 - 1, z0))
+
+/*
+** sub_w8_z18:
+** sub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w8, z18),
+ svsub_za64_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w8, z23),
+ svsub_za64_vg1x2 (w8, z23))
+
+/*
+** sub_w8_z28:
+** sub za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svuint64x2_t,
+ svsub_za64_u64_vg1x2 (w8, z28),
+ svsub_za64_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** sub_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_0_z0, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (0, z0),
+ svsub_za64_vg1x4 (0, z0))
+
+/*
+** sub_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w0_z0, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w0, z0),
+ svsub_za64_vg1x4 (w0, z0))
+
+/*
+** sub_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w7_z0, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w7, z0),
+ svsub_za64_vg1x4 (w7, z0))
+
+/*
+** sub_w8_z0:
+** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z0, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w8, z0),
+ svsub_za64_vg1x4 (w8, z0))
+
+/*
+** sub_w11_z0:
+** sub za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w11_z0, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w11, z0),
+ svsub_za64_vg1x4 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w12_z0, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w12, z0),
+ svsub_za64_vg1x4 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+** sub za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w8 + 7, z0),
+ svsub_za64_vg1x4 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w8 + 8, z0),
+ svsub_za64_vg1x4 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w8 - 1, z0),
+ svsub_za64_vg1x4 (w8 - 1, z0))
+
+/*
+** sub_w8_z4:
+** sub za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z4, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w8, z4),
+ svsub_za64_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** sub_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z18, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w8, z18),
+ svsub_za64_vg1x4 (w8, z18))
+
+/*
+** sub_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sub za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (sub_w8_z23, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w8, z23),
+ svsub_za64_vg1x4 (w8, z23))
+
+/*
+** sub_w8_z28:
+** sub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (sub_w8_z28, svuint64x4_t,
+ svsub_za64_u64_vg1x4 (w8, z28),
+ svsub_za64_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sudot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** sudot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_0_z0_z4_0, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (0, z0, z4, 0),
+ svsudot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** sudot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** sudot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w0_z0_z7_1, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (w0, z0, z7, 1),
+ svsudot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** sudot_lane_w8_z28_z4_2:
+** sudot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8_z28_z4_2, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (w8, z28, z4, 2),
+ svsudot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** sudot_lane_w8p7_z0_z4_3:
+** sudot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8p7_z0_z4_3, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (w8 + 7, z0, z4, 3),
+ svsudot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** sudot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** sudot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8p8_z0_z4_0, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (w8 + 8, z0, z4, 0),
+ svsudot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** sudot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** sudot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w0m1_z0_z4_1, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (w0 - 1, z0, z4, 1),
+ svsudot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** sudot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** sudot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (sudot_lane_w8_z4_z15_2, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (w8, z4, z15, 2),
+ svsudot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** sudot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** sudot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8_z28_z16_3, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (w8, z28, z16, 3),
+ svsudot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** sudot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** sudot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8_z17_z7_0, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (w8, z17, z7, 0),
+ svsudot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** sudot_lane_w8_z22_z4_1:
+** sudot za\.s\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8_z22_z4_1, svint8x2_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x2 (w8, z22, z4, 1),
+ svsudot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** sudot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** sudot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_0_z0_z4_0, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (0, z0, z4, 0),
+ svsudot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** sudot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** sudot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w0_z0_z7_1, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (w0, z0, z7, 1),
+ svsudot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** sudot_lane_w8_z28_z4_2:
+** sudot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8_z28_z4_2, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (w8, z28, z4, 2),
+ svsudot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** sudot_lane_w8p7_z0_z4_3:
+** sudot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8p7_z0_z4_3, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (w8 + 7, z0, z4, 3),
+ svsudot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** sudot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** sudot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8p8_z0_z4_0, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (w8 + 8, z0, z4, 0),
+ svsudot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** sudot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** sudot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w0m1_z0_z4_1, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (w0 - 1, z0, z4, 1),
+ svsudot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** sudot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** sudot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (sudot_lane_w8_z4_z15_2, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (w8, z4, z15, 2),
+ svsudot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** sudot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** sudot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8_z28_z16_3, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (w8, z28, z16, 3),
+ svsudot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** sudot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sudot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8_z17_z7_0, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (w8, z17, z7, 0),
+ svsudot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** sudot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** sudot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (sudot_lane_w8_z22_z4_1, svint8x4_t, svuint8_t,
+ svsudot_lane_za32_s8_vg1x4 (w8, z22, z4, 1),
+ svsudot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z4:
+** mov (w8|w9|w10|w11), #?0
+** usdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z4, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (0, z0, svreinterpret_u8 (z4)),
+ svsudot_za32_vg1x2 (0, z0, svreinterpret_u8 (z4)))
+
+/*
+** dot_w0_z0_z4:
+** mov (w8|w9|w10|w11), w0
+** usdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z4, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w0, z0, svreinterpret_u8 (z4)),
+ svsudot_za32_vg1x2 (w0, z0, svreinterpret_u8 (z4)))
+
+/*
+** dot_w8_z0_z18:
+** usdot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8, z0, svreinterpret_u8 (z18)),
+ svsudot_za32_vg1x2 (w8, z0, svreinterpret_u8 (z18)))
+
+/*
+** dot_w8_z4_z18:
+** usdot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8, z4, svreinterpret_u8 (z18)),
+ svsudot_za32_vg1x2 (w8, z4, svreinterpret_u8 (z18)))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** usdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8, z0, svreinterpret_u8 (z23)),
+ svsudot_za32_vg1x2 (w8, z0, svreinterpret_u8 (z23)))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** usdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8, z23, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8, z23, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8_z18_z28:
+** usdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8, z18, svreinterpret_u8 (z28)),
+ svsudot_za32_vg1x2 (w8, z18, svreinterpret_u8 (z28)))
+
+/*
+** dot_w8_z28_z4:
+** usdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8, z28, svreinterpret_u8 (z4)),
+ svsudot_za32_vg1x2 (w8, z28, svreinterpret_u8 (z4)))
+
+/*
+** dot_w8p1_z4_z0:
+** usdot za\.s\[w8, 1, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8 + 1, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8 + 1, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8p2_z4_z0:
+** usdot za\.s\[w8, 2, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8 + 2, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8 + 2, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_w11p4_z4_z0:
+** usdot za\.s\[w11, 4, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w11 + 4, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w11 + 4, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8p7_z4_z0:
+** usdot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8 + 7, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8 + 7, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8p8_z0_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** usdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z0_z4, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8 + 8, z0, svreinterpret_u8 (z4)),
+ svsudot_za32_vg1x2 (w8 + 8, z0, svreinterpret_u8 (z4)))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svint8x2_t,
+ svsudot_za32_s8_vg1x2 (w8 - 1, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8 - 1, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sudot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (0, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (0, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sudot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w0, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w0, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8_z1_z0:
+** sudot za\.s\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w8, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8p1_z1_z0:
+** sudot za\.s\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w8 + 1, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8 + 1, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8p2_z20_z0:
+** sudot za\.s\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w8 + 2, z20, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8 + 2, z20, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w11p4_z27_z0:
+** sudot za\.s\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w11 + 4, z27, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w11 + 4, z27, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8p7_z1_z0:
+** sudot za\.s\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w8 + 7, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8 + 7, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sudot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w8 + 8, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w8 + 8, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sudot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w0 - 1, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x2 (w0 - 1, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sudot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w8, z0, svreinterpret_u8 (z15)),
+ svsudot_za32_vg1x2 (w8, z0, svreinterpret_u8 (z15)))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sudot za\.s\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint8x2_t, svint8_t,
+ svsudot_single_za32_s8_vg1x2 (w8, z20, svreinterpret_u8 (z16)),
+ svsudot_za32_vg1x2 (w8, z20, svreinterpret_u8 (z16)))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z4:
+** mov (w8|w9|w10|w11), #?0
+** usdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z4, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (0, z0, svreinterpret_u8 (z4)),
+ svsudot_za32_vg1x4 (0, z0, svreinterpret_u8 (z4)))
+
+/*
+** dot_w0_z0_z4:
+** mov (w8|w9|w10|w11), w0
+** usdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z4, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w0, z0, svreinterpret_u8 (z4)),
+ svsudot_za32_vg1x4 (w0, z0, svreinterpret_u8 (z4)))
+
+/*
+** dot_w8_z4_z0:
+** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z0, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8, z4, svreinterpret_u8 (z0)))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** usdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8, z0, svreinterpret_u8 (z18)),
+ svsudot_za32_vg1x4 (w8, z0, svreinterpret_u8 (z18)))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8, z18, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8, z18, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** usdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8, z0, svreinterpret_u8 (z23)),
+ svsudot_za32_vg1x4 (w8, z0, svreinterpret_u8 (z23)))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8, z23, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8, z23, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8_z4_z28:
+** usdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8, z4, svreinterpret_u8 (z28)),
+ svsudot_za32_vg1x4 (w8, z4, svreinterpret_u8 (z28)))
+
+/*
+** dot_w8_z28_z0:
+** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8, z28, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8, z28, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8p1_z4_z0:
+** usdot za\.s\[w8, 1, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8 + 1, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8 + 1, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8p2_z4_z0:
+** usdot za\.s\[w8, 2, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8 + 2, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8 + 2, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_w11p4_z4_z0:
+** usdot za\.s\[w11, 4, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w11 + 4, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w11 + 4, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8p7_z4_z0:
+** usdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8 + 7, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8 + 7, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_w8p8_z0_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** usdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z0_z4, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8 + 8, z0, svreinterpret_u8 (z4)),
+ svsudot_za32_vg1x4 (w8 + 8, z0, svreinterpret_u8 (z4)))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svint8x4_t,
+ svsudot_za32_s8_vg1x4 (w8 - 1, z4, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8 - 1, z4, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** sudot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (0, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (0, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** sudot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w0, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w0, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8_z1_z0:
+** sudot za\.s\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w8, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8p1_z1_z0:
+** sudot za\.s\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w8 + 1, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8 + 1, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8p4_z20_z0:
+** sudot za\.s\[w8, 4, vgx4\], {z20\.b - z23\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w8 + 4, z20, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8 + 4, z20, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8p6_z27_z0:
+** sudot za\.s\[w8, 6, vgx4\], {z27\.b - z30\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w8 + 6, z27, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8 + 6, z27, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8p7_z1_z0:
+** sudot za\.s\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w8 + 7, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8 + 7, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** sudot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w8 + 8, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w8 + 8, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** sudot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w0 - 1, z1, svreinterpret_u8 (z0)),
+ svsudot_za32_vg1x4 (w0 - 1, z1, svreinterpret_u8 (z0)))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** sudot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w8, z0, svreinterpret_u8 (z15)),
+ svsudot_za32_vg1x4 (w8, z0, svreinterpret_u8 (z15)))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** sudot za\.s\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint8x4_t, svint8_t,
+ svsudot_single_za32_s8_vg1x4 (w8, z20, svreinterpret_u8 (z16)),
+ svsudot_za32_vg1x4 (w8, z20, svreinterpret_u8 (z16)))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** suvdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** suvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (suvdot_lane_0_z0_z4_0, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (0, z0, z4, 0),
+ svsuvdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** suvdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** suvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (suvdot_lane_w0_z0_z7_1, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (w0, z0, z7, 1),
+ svsuvdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** suvdot_lane_w8_z28_z4_2:
+** suvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (suvdot_lane_w8_z28_z4_2, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (w8, z28, z4, 2),
+ svsuvdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** suvdot_lane_w8p7_z0_z4_3:
+** suvdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (suvdot_lane_w8p7_z0_z4_3, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (w8 + 7, z0, z4, 3),
+ svsuvdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** suvdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** suvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (suvdot_lane_w8p8_z0_z4_0, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (w8 + 8, z0, z4, 0),
+ svsuvdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** suvdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** suvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (suvdot_lane_w0m1_z0_z4_1, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (w0 - 1, z0, z4, 1),
+ svsuvdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** suvdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** suvdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (suvdot_lane_w8_z4_z15_2, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (w8, z4, z15, 2),
+ svsuvdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** suvdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** suvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (suvdot_lane_w8_z28_z16_3, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (w8, z28, z16, 3),
+ svsuvdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** suvdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** suvdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (suvdot_lane_w8_z17_z7_0, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (w8, z17, z7, 0),
+ svsuvdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** suvdot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** suvdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (suvdot_lane_w8_z22_z4_1, svint8x4_t, svuint8_t,
+ svsuvdot_lane_za32_s8_vg1x4 (w8, z22, z4, 1),
+ svsuvdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+#ifndef TEST_SME2_ACLE_H
+#define TEST_SME2_ACLE_H 1
+
+#include "../../sme/acle-asm/test_sme_acle.h"
+
+#define TEST_ZA_X1(NAME, ZTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (int w0)) \
+ { \
+ register int w7 __asm ("w7"); \
+ register int w8 __asm ("w8"); \
+ register int w9 __asm ("w9"); \
+ register int w10 __asm ("w10"); \
+ register int w11 __asm ("w11"); \
+ register int w12 __asm ("w12"); \
+ register ZTYPE z0 __asm ("z0"); \
+ register ZTYPE z3 __asm ("z3"); \
+ register ZTYPE z7 __asm ("z7"); \
+ register ZTYPE z16 __asm ("z16"); \
+ register ZTYPE z23 __asm ("z23"); \
+ register ZTYPE z31 __asm ("z31"); \
+ __asm volatile ("" : "=r" (w7), "=r" (w8), "=r" (w9), \
+ "=r" (w10), "=r" (w11), "=r" (w12), \
+ "=w" (z0), "=w" (z3), "=w" (z7), \
+ "=w" (z16), "=w" (z23), "=w" (z31)); \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_ZA_XN(NAME, TTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (int w0)) \
+ { \
+ register int w7 __asm ("w7"); \
+ register int w8 __asm ("w8"); \
+ register int w11 __asm ("w11"); \
+ register int w12 __asm ("w12"); \
+ register int w15 __asm ("w15"); \
+ register int w16 __asm ("w16"); \
+ register TTYPE z0 __asm ("z0"); \
+ register TTYPE z4 __asm ("z4"); \
+ register TTYPE z18 __asm ("z18"); \
+ register TTYPE z23 __asm ("z23"); \
+ register TTYPE z28 __asm ("z28"); \
+ __asm volatile ("" : "=r" (w7), "=r" (w8), "=r" (w11), \
+ "=r" (w12), "=r" (w15), "=r" (w16), \
+ "=w" (z0), "=w" (z4), "=w" (z18), \
+ "=w" (z23), "=w" (z28)); \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_READ_ZA_XN(NAME, TTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (int w0)) \
+ { \
+ register int w7 __asm ("w7"); \
+ register int w8 __asm ("w8"); \
+ register int w11 __asm ("w11"); \
+ register int w12 __asm ("w12"); \
+ register int w15 __asm ("w15"); \
+ register int w16 __asm ("w16"); \
+ register TTYPE z0 __asm ("z0"); \
+ register TTYPE z4 __asm ("z4"); \
+ register TTYPE z18 __asm ("z18"); \
+ register TTYPE z23 __asm ("z23"); \
+ register TTYPE z28 __asm ("z28"); \
+ __asm volatile ("" : "=r" (w7), "=r" (w8), "=r" (w11), \
+ "=r" (w12), "=r" (w15), "=r" (w16)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "w" (z0), "w" (z4), "w" (z18), \
+ "w" (z23), "w" (z28)); \
+ }
+
+#define TEST_ZA_SINGLE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (int w0)) \
+ { \
+ register int w8 __asm ("w8"); \
+ register int w11 __asm ("w11"); \
+ register ZTYPE z0 __asm ("z0"); \
+ register TTYPE z1 __asm ("z1"); \
+ register ZTYPE z16 __asm ("z16"); \
+ register TTYPE z20 __asm ("z20"); \
+ register TTYPE z27 __asm ("z27"); \
+ __asm volatile ("" : "=r" (w8), "=r" (w11), "=w" (z0), \
+ "=w" (z1), "=w" (z16), "=w" (z20), \
+ "=w" (z27)); \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_ZA_SINGLE_Z15(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (int w0)) \
+ { \
+ register int w8 __asm ("w8"); \
+ register TTYPE z0 __asm ("z0"); \
+ register ZTYPE z15 __asm ("z15"); \
+ __asm volatile ("" : "=r" (w8), "=w" (z0), "=w" (z15)); \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_ZA_LANE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (int w0)) \
+ { \
+ register int w8 __asm ("w8"); \
+ register int w11 __asm ("w11"); \
+ register TTYPE z0 __asm ("z0"); \
+ register ZTYPE z4 __asm ("z4"); \
+ register ZTYPE z7 __asm ("z7"); \
+ register ZTYPE z16 __asm ("z16"); \
+ register TTYPE z17 __asm ("z17"); \
+ register TTYPE z22 __asm ("z22"); \
+ register TTYPE z28 __asm ("z28"); \
+ __asm volatile ("" : "=r" (w8), "=r" (w11), "=w" (z0), \
+ "=w" (z4), "=w" (z7), "=w" (z16), \
+ "=w" (z17), "=w" (z22), "=w" (z28)); \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_ZA_LANE_Z15(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (int w0)) \
+ { \
+ register int w8 __asm ("w8"); \
+ register TTYPE z4 __asm ("z4"); \
+ register ZTYPE z15 __asm ("z15"); \
+ __asm volatile ("" : "=r" (w8), "=w" (z4), "=w" (z15)); \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#endif
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** sunpk {z0\.s - z1\.s}, z4\.h
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svint32x2_t, svint16_t, z0,
+ svunpk_s32_s16_x2 (z4),
+ svunpk_s32 (z4))
+
+/*
+** unpk_z4_z0:
+** sunpk {z4\.s - z5\.s}, z0\.h
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svint16_t, svint32x2_t, z4,
+ svunpk_s32_s16_x2 (z0),
+ svunpk_s32 (z0))
+
+/*
+** unpk_z18_z23:
+** sunpk {z18\.s - z19\.s}, z23\.h
+** ret
+*/
+TEST_DUAL_XN (unpk_z18_z23, svint32x2_t, svint16_t, z18,
+ svunpk_s32_s16_x2 (z23),
+ svunpk_s32 (z23))
+
+/*
+** unpk_z23_z28:
+** sunpk [^\n]+, z28\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svint16_t, svint32x2_t, z23,
+ svunpk_s32_s16_x2 (z28),
+ svunpk_s32 (z28))
+
+/*
+** unpk_z28_z4:
+** sunpk {z28\.s - z29\.s}, z4\.h
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svint32x2_t, svint16_t, z28,
+ svunpk_s32_s16_x2 (z4),
+ svunpk_s32 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** sunpk {z0\.s - z3\.s}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svint32x4_t, svint16x2_t, z0,
+ svunpk_s32_s16_x4 (z4),
+ svunpk_s32 (z4))
+
+/*
+** unpk_z4_z0:
+** sunpk {z4\.s - z7\.s}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svint16x2_t, svint32x4_t, z4,
+ svunpk_s32_s16_x4 (z0),
+ svunpk_s32 (z0))
+
+/*
+** unpk_z4_z18:
+** sunpk {z4\.s - z7\.s}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z18, svint16x2_t, svint32x4_t, z4,
+ svunpk_s32_s16_x4 (z18),
+ svunpk_s32 (z18))
+
+/*
+** unpk_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sunpk {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z23, svint32x4_t, svint16x2_t, z28,
+ svunpk_s32_s16_x4 (z23),
+ svunpk_s32 (z23))
+
+/*
+** unpk_z23_z28:
+** sunpk [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svint16x2_t, svint32x4_t, z23,
+ svunpk_s32_s16_x4 (z28),
+ svunpk_s32 (z28))
+
+/*
+** unpk_z23_z18:
+** sunpk {z[^\n]+}, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z18, svint16x2_t, svint32x4_t, z23,
+ svunpk_s32_s16_x4 (z18),
+ svunpk_s32 (z18))
+
+/*
+** unpk_z28_z4:
+** sunpk {z28\.s - z31\.s}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svint32x4_t, svint16x2_t, z28,
+ svunpk_s32_s16_x4 (z4),
+ svunpk_s32 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** sunpk {z0\.d - z1\.d}, z4\.s
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svint64x2_t, svint32_t, z0,
+ svunpk_s64_s32_x2 (z4),
+ svunpk_s64 (z4))
+
+/*
+** unpk_z4_z0:
+** sunpk {z4\.d - z5\.d}, z0\.s
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svint32_t, svint64x2_t, z4,
+ svunpk_s64_s32_x2 (z0),
+ svunpk_s64 (z0))
+
+/*
+** unpk_z18_z23:
+** sunpk {z18\.d - z19\.d}, z23\.s
+** ret
+*/
+TEST_DUAL_XN (unpk_z18_z23, svint64x2_t, svint32_t, z18,
+ svunpk_s64_s32_x2 (z23),
+ svunpk_s64 (z23))
+
+/*
+** unpk_z23_z28:
+** sunpk [^\n]+, z28\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svint32_t, svint64x2_t, z23,
+ svunpk_s64_s32_x2 (z28),
+ svunpk_s64 (z28))
+
+/*
+** unpk_z28_z4:
+** sunpk {z28\.d - z29\.d}, z4\.s
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svint64x2_t, svint32_t, z28,
+ svunpk_s64_s32_x2 (z4),
+ svunpk_s64 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** sunpk {z0\.d - z3\.d}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svint64x4_t, svint32x2_t, z0,
+ svunpk_s64_s32_x4 (z4),
+ svunpk_s64 (z4))
+
+/*
+** unpk_z4_z0:
+** sunpk {z4\.d - z7\.d}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svint32x2_t, svint64x4_t, z4,
+ svunpk_s64_s32_x4 (z0),
+ svunpk_s64 (z0))
+
+/*
+** unpk_z4_z18:
+** sunpk {z4\.d - z7\.d}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z18, svint32x2_t, svint64x4_t, z4,
+ svunpk_s64_s32_x4 (z18),
+ svunpk_s64 (z18))
+
+/*
+** unpk_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sunpk {z28\.d - z31\.d}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z23, svint64x4_t, svint32x2_t, z28,
+ svunpk_s64_s32_x4 (z23),
+ svunpk_s64 (z23))
+
+/*
+** unpk_z23_z28:
+** sunpk [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svint32x2_t, svint64x4_t, z23,
+ svunpk_s64_s32_x4 (z28),
+ svunpk_s64 (z28))
+
+/*
+** unpk_z23_z18:
+** sunpk {z[^\n]+}, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z18, svint32x2_t, svint64x4_t, z23,
+ svunpk_s64_s32_x4 (z18),
+ svunpk_s64 (z18))
+
+/*
+** unpk_z28_z4:
+** sunpk {z28\.d - z31\.d}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svint64x4_t, svint32x2_t, z28,
+ svunpk_s64_s32_x4 (z4),
+ svunpk_s64 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** sunpk {z0\.h - z1\.h}, z4\.b
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svint16x2_t, svint8_t, z0,
+ svunpk_s16_s8_x2 (z4),
+ svunpk_s16 (z4))
+
+/*
+** unpk_z4_z0:
+** sunpk {z4\.h - z5\.h}, z0\.b
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svint8_t, svint16x2_t, z4,
+ svunpk_s16_s8_x2 (z0),
+ svunpk_s16 (z0))
+
+/*
+** unpk_z18_z23:
+** sunpk {z18\.h - z19\.h}, z23\.b
+** ret
+*/
+TEST_DUAL_XN (unpk_z18_z23, svint16x2_t, svint8_t, z18,
+ svunpk_s16_s8_x2 (z23),
+ svunpk_s16 (z23))
+
+/*
+** unpk_z23_z28:
+** sunpk [^\n]+, z28\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svint8_t, svint16x2_t, z23,
+ svunpk_s16_s8_x2 (z28),
+ svunpk_s16 (z28))
+
+/*
+** unpk_z28_z4:
+** sunpk {z28\.h - z29\.h}, z4\.b
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svint16x2_t, svint8_t, z28,
+ svunpk_s16_s8_x2 (z4),
+ svunpk_s16 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** sunpk {z0\.h - z3\.h}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svint16x4_t, svint8x2_t, z0,
+ svunpk_s16_s8_x4 (z4),
+ svunpk_s16 (z4))
+
+/*
+** unpk_z4_z0:
+** sunpk {z4\.h - z7\.h}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svint8x2_t, svint16x4_t, z4,
+ svunpk_s16_s8_x4 (z0),
+ svunpk_s16 (z0))
+
+/*
+** unpk_z4_z18:
+** sunpk {z4\.h - z7\.h}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z18, svint8x2_t, svint16x4_t, z4,
+ svunpk_s16_s8_x4 (z18),
+ svunpk_s16 (z18))
+
+/*
+** unpk_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** sunpk {z28\.h - z31\.h}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z23, svint16x4_t, svint8x2_t, z28,
+ svunpk_s16_s8_x4 (z23),
+ svunpk_s16 (z23))
+
+/*
+** unpk_z23_z28:
+** sunpk [^\n]+, {z28\.b - z29\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svint8x2_t, svint16x4_t, z23,
+ svunpk_s16_s8_x4 (z28),
+ svunpk_s16 (z28))
+
+/*
+** unpk_z23_z18:
+** sunpk {z[^\n]+}, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z18, svint8x2_t, svint16x4_t, z23,
+ svunpk_s16_s8_x4 (z18),
+ svunpk_s16 (z18))
+
+/*
+** unpk_z28_z4:
+** sunpk {z28\.h - z31\.h}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svint16x4_t, svint8x2_t, z28,
+ svunpk_s16_s8_x4 (z4),
+ svunpk_s16 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** uunpk {z0\.s - z1\.s}, z4\.h
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svuint32x2_t, svuint16_t, z0,
+ svunpk_u32_u16_x2 (z4),
+ svunpk_u32 (z4))
+
+/*
+** unpk_z4_z0:
+** uunpk {z4\.s - z5\.s}, z0\.h
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svuint16_t, svuint32x2_t, z4,
+ svunpk_u32_u16_x2 (z0),
+ svunpk_u32 (z0))
+
+/*
+** unpk_z18_z23:
+** uunpk {z18\.s - z19\.s}, z23\.h
+** ret
+*/
+TEST_DUAL_XN (unpk_z18_z23, svuint32x2_t, svuint16_t, z18,
+ svunpk_u32_u16_x2 (z23),
+ svunpk_u32 (z23))
+
+/*
+** unpk_z23_z28:
+** uunpk [^\n]+, z28\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svuint16_t, svuint32x2_t, z23,
+ svunpk_u32_u16_x2 (z28),
+ svunpk_u32 (z28))
+
+/*
+** unpk_z28_z4:
+** uunpk {z28\.s - z29\.s}, z4\.h
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svuint32x2_t, svuint16_t, z28,
+ svunpk_u32_u16_x2 (z4),
+ svunpk_u32 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** uunpk {z0\.s - z3\.s}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svuint32x4_t, svuint16x2_t, z0,
+ svunpk_u32_u16_x4 (z4),
+ svunpk_u32 (z4))
+
+/*
+** unpk_z4_z0:
+** uunpk {z4\.s - z7\.s}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svuint16x2_t, svuint32x4_t, z4,
+ svunpk_u32_u16_x4 (z0),
+ svunpk_u32 (z0))
+
+/*
+** unpk_z4_z18:
+** uunpk {z4\.s - z7\.s}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z18, svuint16x2_t, svuint32x4_t, z4,
+ svunpk_u32_u16_x4 (z18),
+ svunpk_u32 (z18))
+
+/*
+** unpk_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** uunpk {z28\.s - z31\.s}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z23, svuint32x4_t, svuint16x2_t, z28,
+ svunpk_u32_u16_x4 (z23),
+ svunpk_u32 (z23))
+
+/*
+** unpk_z23_z28:
+** uunpk [^\n]+, {z28\.h - z29\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svuint16x2_t, svuint32x4_t, z23,
+ svunpk_u32_u16_x4 (z28),
+ svunpk_u32 (z28))
+
+/*
+** unpk_z23_z18:
+** uunpk {z[^\n]+}, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z18, svuint16x2_t, svuint32x4_t, z23,
+ svunpk_u32_u16_x4 (z18),
+ svunpk_u32 (z18))
+
+/*
+** unpk_z28_z4:
+** uunpk {z28\.s - z31\.s}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svuint32x4_t, svuint16x2_t, z28,
+ svunpk_u32_u16_x4 (z4),
+ svunpk_u32 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** uunpk {z0\.d - z1\.d}, z4\.s
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svuint64x2_t, svuint32_t, z0,
+ svunpk_u64_u32_x2 (z4),
+ svunpk_u64 (z4))
+
+/*
+** unpk_z4_z0:
+** uunpk {z4\.d - z5\.d}, z0\.s
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svuint32_t, svuint64x2_t, z4,
+ svunpk_u64_u32_x2 (z0),
+ svunpk_u64 (z0))
+
+/*
+** unpk_z18_z23:
+** uunpk {z18\.d - z19\.d}, z23\.s
+** ret
+*/
+TEST_DUAL_XN (unpk_z18_z23, svuint64x2_t, svuint32_t, z18,
+ svunpk_u64_u32_x2 (z23),
+ svunpk_u64 (z23))
+
+/*
+** unpk_z23_z28:
+** uunpk [^\n]+, z28\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svuint32_t, svuint64x2_t, z23,
+ svunpk_u64_u32_x2 (z28),
+ svunpk_u64 (z28))
+
+/*
+** unpk_z28_z4:
+** uunpk {z28\.d - z29\.d}, z4\.s
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svuint64x2_t, svuint32_t, z28,
+ svunpk_u64_u32_x2 (z4),
+ svunpk_u64 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** uunpk {z0\.d - z3\.d}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svuint64x4_t, svuint32x2_t, z0,
+ svunpk_u64_u32_x4 (z4),
+ svunpk_u64 (z4))
+
+/*
+** unpk_z4_z0:
+** uunpk {z4\.d - z7\.d}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svuint32x2_t, svuint64x4_t, z4,
+ svunpk_u64_u32_x4 (z0),
+ svunpk_u64 (z0))
+
+/*
+** unpk_z4_z18:
+** uunpk {z4\.d - z7\.d}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z18, svuint32x2_t, svuint64x4_t, z4,
+ svunpk_u64_u32_x4 (z18),
+ svunpk_u64 (z18))
+
+/*
+** unpk_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** uunpk {z28\.d - z31\.d}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z23, svuint64x4_t, svuint32x2_t, z28,
+ svunpk_u64_u32_x4 (z23),
+ svunpk_u64 (z23))
+
+/*
+** unpk_z23_z28:
+** uunpk [^\n]+, {z28\.s - z29\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svuint32x2_t, svuint64x4_t, z23,
+ svunpk_u64_u32_x4 (z28),
+ svunpk_u64 (z28))
+
+/*
+** unpk_z23_z18:
+** uunpk {z[^\n]+}, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z18, svuint32x2_t, svuint64x4_t, z23,
+ svunpk_u64_u32_x4 (z18),
+ svunpk_u64 (z18))
+
+/*
+** unpk_z28_z4:
+** uunpk {z28\.d - z31\.d}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svuint64x4_t, svuint32x2_t, z28,
+ svunpk_u64_u32_x4 (z4),
+ svunpk_u64 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** uunpk {z0\.h - z1\.h}, z4\.b
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svuint16x2_t, svuint8_t, z0,
+ svunpk_u16_u8_x2 (z4),
+ svunpk_u16 (z4))
+
+/*
+** unpk_z4_z0:
+** uunpk {z4\.h - z5\.h}, z0\.b
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svuint8_t, svuint16x2_t, z4,
+ svunpk_u16_u8_x2 (z0),
+ svunpk_u16 (z0))
+
+/*
+** unpk_z18_z23:
+** uunpk {z18\.h - z19\.h}, z23\.b
+** ret
+*/
+TEST_DUAL_XN (unpk_z18_z23, svuint16x2_t, svuint8_t, z18,
+ svunpk_u16_u8_x2 (z23),
+ svunpk_u16 (z23))
+
+/*
+** unpk_z23_z28:
+** uunpk [^\n]+, z28\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svuint8_t, svuint16x2_t, z23,
+ svunpk_u16_u8_x2 (z28),
+ svunpk_u16 (z28))
+
+/*
+** unpk_z28_z4:
+** uunpk {z28\.h - z29\.h}, z4\.b
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svuint16x2_t, svuint8_t, z28,
+ svunpk_u16_u8_x2 (z4),
+ svunpk_u16 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** unpk_z0_z4:
+** uunpk {z0\.h - z3\.h}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_DUAL_XN (unpk_z0_z4, svuint16x4_t, svuint8x2_t, z0,
+ svunpk_u16_u8_x4 (z4),
+ svunpk_u16 (z4))
+
+/*
+** unpk_z4_z0:
+** uunpk {z4\.h - z7\.h}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z0, svuint8x2_t, svuint16x4_t, z4,
+ svunpk_u16_u8_x4 (z0),
+ svunpk_u16 (z0))
+
+/*
+** unpk_z4_z18:
+** uunpk {z4\.h - z7\.h}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_DUAL_XN (unpk_z4_z18, svuint8x2_t, svuint16x4_t, z4,
+ svunpk_u16_u8_x4 (z18),
+ svunpk_u16 (z18))
+
+/*
+** unpk_z28_z23:
+** mov [^\n]+
+** mov [^\n]+
+** uunpk {z28\.h - z31\.h}, [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z23, svuint16x4_t, svuint8x2_t, z28,
+ svunpk_u16_u8_x4 (z23),
+ svunpk_u16 (z23))
+
+/*
+** unpk_z23_z28:
+** uunpk [^\n]+, {z28\.b - z29\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z28, svuint8x2_t, svuint16x4_t, z23,
+ svunpk_u16_u8_x4 (z28),
+ svunpk_u16 (z28))
+
+/*
+** unpk_z23_z18:
+** uunpk {z[^\n]+}, {z18\.b - z19\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_DUAL_XN (unpk_z23_z18, svuint8x2_t, svuint16x4_t, z23,
+ svunpk_u16_u8_x4 (z18),
+ svunpk_u16 (z18))
+
+/*
+** unpk_z28_z4:
+** uunpk {z28\.h - z31\.h}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_DUAL_XN (unpk_z28_z4, svuint16x4_t, svuint8x2_t, z28,
+ svunpk_u16_u8_x4 (z4),
+ svunpk_u16 (z4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** usdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_0_z0_z4_0, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (0, z0, z4, 0),
+ svusdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** usdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w0_z0_z7_1, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (w0, z0, z7, 1),
+ svusdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** usdot_lane_w8_z28_z4_2:
+** usdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8_z28_z4_2, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (w8, z28, z4, 2),
+ svusdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** usdot_lane_w8p7_z0_z4_3:
+** usdot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8p7_z0_z4_3, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (w8 + 7, z0, z4, 3),
+ svusdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** usdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8p8_z0_z4_0, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (w8 + 8, z0, z4, 0),
+ svusdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** usdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w0m1_z0_z4_1, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (w0 - 1, z0, z4, 1),
+ svusdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** usdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** usdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (usdot_lane_w8_z4_z15_2, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (w8, z4, z15, 2),
+ svusdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** usdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** usdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8_z28_z16_3, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (w8, z28, z16, 3),
+ svusdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** usdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** usdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8_z17_z7_0, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (w8, z17, z7, 0),
+ svusdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** usdot_lane_w8_z22_z4_1:
+** usdot za\.s\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8_z22_z4_1, svuint8x2_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x2 (w8, z22, z4, 1),
+ svusdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** usdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_0_z0_z4_0, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (0, z0, z4, 0),
+ svusdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** usdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w0_z0_z7_1, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (w0, z0, z7, 1),
+ svusdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** usdot_lane_w8_z28_z4_2:
+** usdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8_z28_z4_2, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (w8, z28, z4, 2),
+ svusdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** usdot_lane_w8p7_z0_z4_3:
+** usdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8p7_z0_z4_3, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (w8 + 7, z0, z4, 3),
+ svusdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** usdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8p8_z0_z4_0, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (w8 + 8, z0, z4, 0),
+ svusdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** usdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w0m1_z0_z4_1, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (w0 - 1, z0, z4, 1),
+ svusdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** usdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** usdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (usdot_lane_w8_z4_z15_2, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (w8, z4, z15, 2),
+ svusdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** usdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** usdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8_z28_z16_3, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (w8, z28, z16, 3),
+ svusdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** usdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** usdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8_z17_z7_0, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (w8, z17, z7, 0),
+ svusdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** usdot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** usdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (usdot_lane_w8_z22_z4_1, svuint8x4_t, svint8_t,
+ svusdot_lane_za32_u8_vg1x4 (w8, z22, z4, 1),
+ svusdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z4:
+** mov (w8|w9|w10|w11), #?0
+** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z4, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (0, z0, svreinterpret_s8 (z4)),
+ svusdot_za32_vg1x2 (0, z0, svreinterpret_s8 (z4)))
+
+/*
+** dot_w0_z0_z4:
+** mov (w8|w9|w10|w11), w0
+** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z4, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w0, z0, svreinterpret_s8 (z4)),
+ svusdot_za32_vg1x2 (w0, z0, svreinterpret_s8 (z4)))
+
+/*
+** dot_w8_z0_z18:
+** usdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8, z0, svreinterpret_s8 (z18)),
+ svusdot_za32_vg1x2 (w8, z0, svreinterpret_s8 (z18)))
+
+/*
+** dot_w8_z4_z18:
+** usdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z18, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8, z4, svreinterpret_s8 (z18)),
+ svusdot_za32_vg1x2 (w8, z4, svreinterpret_s8 (z18)))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z23:
+** ...
+** usdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8, z0, svreinterpret_s8 (z23)),
+ svusdot_za32_vg1x2 (w8, z0, svreinterpret_s8 (z23)))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** usdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8, z23, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8, z23, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8_z18_z28:
+** usdot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z28, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8, z18, svreinterpret_s8 (z28)),
+ svusdot_za32_vg1x2 (w8, z18, svreinterpret_s8 (z28)))
+
+/*
+** dot_w8_z28_z4:
+** usdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z4, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8, z28, svreinterpret_s8 (z4)),
+ svusdot_za32_vg1x2 (w8, z28, svreinterpret_s8 (z4)))
+
+/*
+** dot_w8p1_z4_z0:
+** usdot za\.s\[w8, 1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8 + 1, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8 + 1, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8p2_z4_z0:
+** usdot za\.s\[w8, 2, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8 + 2, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8 + 2, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_w11p4_z4_z0:
+** usdot za\.s\[w11, 4, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w11 + 4, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w11 + 4, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8p7_z4_z0:
+** usdot za\.s\[w8, 7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8 + 7, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8 + 7, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8p8_z0_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z0_z4, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8 + 8, z0, svreinterpret_s8 (z4)),
+ svusdot_za32_vg1x2 (w8 + 8, z0, svreinterpret_s8 (z4)))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** usdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svuint8x2_t,
+ svusdot_za32_u8_vg1x2 (w8 - 1, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8 - 1, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** usdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (0, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (0, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** usdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w0, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w0, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8_z1_z0:
+** usdot za\.s\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w8, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8p1_z1_z0:
+** usdot za\.s\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w8 + 1, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8 + 1, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8p2_z20_z0:
+** usdot za\.s\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w8 + 2, z20, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8 + 2, z20, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w11p4_z27_z0:
+** usdot za\.s\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w11 + 4, z27, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w11 + 4, z27, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8p7_z1_z0:
+** usdot za\.s\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w8 + 7, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8 + 7, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** usdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w8 + 8, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w8 + 8, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** usdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w0 - 1, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x2 (w0 - 1, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** usdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w8, z0, svreinterpret_s8 (z15)),
+ svusdot_za32_vg1x2 (w8, z0, svreinterpret_s8 (z15)))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** usdot za\.s\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint8x2_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x2 (w8, z20, svreinterpret_s8 (z16)),
+ svusdot_za32_vg1x2 (w8, z20, svreinterpret_s8 (z16)))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** dot_0_z0_z4:
+** mov (w8|w9|w10|w11), #?0
+** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_0_z0_z4, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (0, z0, svreinterpret_s8 (z4)),
+ svusdot_za32_vg1x4 (0, z0, svreinterpret_s8 (z4)))
+
+/*
+** dot_w0_z0_z4:
+** mov (w8|w9|w10|w11), w0
+** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w0_z0_z4, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w0, z0, svreinterpret_s8 (z4)),
+ svusdot_za32_vg1x4 (w0, z0, svreinterpret_s8 (z4)))
+
+/*
+** dot_w8_z4_z0:
+** usdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z0, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8, z4, svreinterpret_s8 (z0)))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** dot_w8_z0_z18:
+** ...
+** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z18, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8, z0, svreinterpret_s8 (z18)),
+ svusdot_za32_vg1x4 (w8, z0, svreinterpret_s8 (z18)))
+
+/*
+** dot_w8_z18_z0:
+** ...
+** usdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z18_z0, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8, z18, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8, z18, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8_z0_z23:
+** ...
+** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+
+** ret
+*/
+TEST_ZA_XN (dot_w8_z0_z23, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8, z0, svreinterpret_s8 (z23)),
+ svusdot_za32_vg1x4 (w8, z0, svreinterpret_s8 (z23)))
+
+/*
+** dot_w8_z23_z0:
+** ...
+** usdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z23_z0, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8, z23, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8, z23, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8_z4_z28:
+** usdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z4_z28, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8, z4, svreinterpret_s8 (z28)),
+ svusdot_za32_vg1x4 (w8, z4, svreinterpret_s8 (z28)))
+
+/*
+** dot_w8_z28_z0:
+** usdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8_z28_z0, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8, z28, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8, z28, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8p1_z4_z0:
+** usdot za\.s\[w8, 1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p1_z4_z0, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8 + 1, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8 + 1, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8p2_z4_z0:
+** usdot za\.s\[w8, 2, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p2_z4_z0, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8 + 2, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8 + 2, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_w11p4_z4_z0:
+** usdot za\.s\[w11, 4, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w11p4_z4_z0, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w11 + 4, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w11 + 4, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8p7_z4_z0:
+** usdot za\.s\[w8, 7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p7_z4_z0, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8 + 7, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8 + 7, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_w8p8_z0_z4:
+** add (w8|w9|w10|w11), w8, #?8
+** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8p8_z0_z4, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8 + 8, z0, svreinterpret_s8 (z4)),
+ svusdot_za32_vg1x4 (w8 + 8, z0, svreinterpret_s8 (z4)))
+
+/*
+** dot_w8m1_z4_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** usdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (dot_w8m1_z4_z0, svuint8x4_t,
+ svusdot_za32_u8_vg1x4 (w8 - 1, z4, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8 - 1, z4, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_0_z1_z0:
+** mov (w8|w9|w10|w11), #?0
+** usdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (0, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (0, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w0_z1_z0:
+** mov (w8|w9|w10|w11), w0
+** usdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w0, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w0, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8_z1_z0:
+** usdot za\.s\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w8, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8p1_z1_z0:
+** usdot za\.s\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w8 + 1, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8 + 1, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8p4_z20_z0:
+** usdot za\.s\[w8, 4, vgx4\], {z20\.b - z23\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w8 + 4, z20, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8 + 4, z20, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8p6_z27_z0:
+** usdot za\.s\[w8, 6, vgx4\], {z27\.b - z30\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w8 + 6, z27, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8 + 6, z27, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8p7_z1_z0:
+** usdot za\.s\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w8 + 7, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8 + 7, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8p8_z1_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** usdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w8 + 8, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w8 + 8, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w0m1_z1_z0:
+** sub (w8|w9|w10|w11), w0, #?1
+** usdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w0 - 1, z1, svreinterpret_s8 (z0)),
+ svusdot_za32_vg1x4 (w0 - 1, z1, svreinterpret_s8 (z0)))
+
+/*
+** dot_single_w8_z0_z15:
+** str d15, \[sp, #?-16\]!
+** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w8, z0, svreinterpret_s8 (z15)),
+ svusdot_za32_vg1x4 (w8, z0, svreinterpret_s8 (z15)))
+
+/*
+** dot_single_w8_z20_z16:
+** mov (z[0-7]).d, z16.d
+** usdot za\.s\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b
+** ret
+*/
+TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint8x4_t, svuint8_t,
+ svusdot_single_za32_u8_vg1x4 (w8, z20, svreinterpret_s8 (z16)),
+ svusdot_za32_vg1x4 (w8, z20, svreinterpret_s8 (z16)))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** usvdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** usvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (usvdot_lane_0_z0_z4_0, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (0, z0, z4, 0),
+ svusvdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** usvdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** usvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (usvdot_lane_w0_z0_z7_1, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (w0, z0, z7, 1),
+ svusvdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** usvdot_lane_w8_z28_z4_2:
+** usvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (usvdot_lane_w8_z28_z4_2, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (w8, z28, z4, 2),
+ svusvdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** usvdot_lane_w8p7_z0_z4_3:
+** usvdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (usvdot_lane_w8p7_z0_z4_3, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (w8 + 7, z0, z4, 3),
+ svusvdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** usvdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** usvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (usvdot_lane_w8p8_z0_z4_0, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (w8 + 8, z0, z4, 0),
+ svusvdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** usvdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** usvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (usvdot_lane_w0m1_z0_z4_1, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (w0 - 1, z0, z4, 1),
+ svusvdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** usvdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** usvdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (usvdot_lane_w8_z4_z15_2, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (w8, z4, z15, 2),
+ svusvdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** usvdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** usvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (usvdot_lane_w8_z28_z16_3, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (w8, z28, z16, 3),
+ svusvdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** usvdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** usvdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (usvdot_lane_w8_z17_z7_0, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (w8, z17, z7, 0),
+ svusvdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** usvdot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** usvdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (usvdot_lane_w8_z22_z4_1, svuint8x4_t, svint8_t,
+ svusvdot_lane_za32_u8_vg1x4 (w8, z22, z4, 1),
+ svusvdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.h - z1\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (uzp_z0_z0, svbfloat16x2_t, z0,
+ svuzp_bf16_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.h - z1\.h}, z4\.h, z5\.h
+** ret
+*/
+TEST_XN (uzp_z0_z4, svbfloat16x2_t, z0,
+ svuzp_bf16_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.h - z5\.h}, z18\.h, z19\.h
+** ret
+*/
+TEST_XN (uzp_z4_z18, svbfloat16x2_t, z4,
+ svuzp_bf16_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.h - z19\.h}, z23\.h, z24\.h
+** ret
+*/
+TEST_XN (uzp_z18_z23, svbfloat16x2_t, z18,
+ svuzp_bf16_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.h, z29\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svbfloat16x2_t, z23,
+ svuzp_bf16_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.h - z29\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (uzp_z28_z0, svbfloat16x2_t, z28,
+ svuzp_bf16_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.h - z29\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svbfloat16x2_t, z28,
+ svuzp_bf16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.h - z29\.h}, z5\.h, z19\.h
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svbfloat16x2_t, z28,
+ svuzp_bf16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svbfloat16x4_t, z0,
+ svuzp_bf16_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svbfloat16x4_t, z0,
+ svuzp_bf16_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.h - z7\.h}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svbfloat16x4_t, z4,
+ svuzp_bf16_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svbfloat16x4_t, z18,
+ svuzp_bf16_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svbfloat16x4_t, z23,
+ svuzp_bf16_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svbfloat16x4_t, z28,
+ svuzp_bf16_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.h - z1\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (uzp_z0_z0, svfloat16x2_t, z0,
+ svuzp_f16_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.h - z1\.h}, z4\.h, z5\.h
+** ret
+*/
+TEST_XN (uzp_z0_z4, svfloat16x2_t, z0,
+ svuzp_f16_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.h - z5\.h}, z18\.h, z19\.h
+** ret
+*/
+TEST_XN (uzp_z4_z18, svfloat16x2_t, z4,
+ svuzp_f16_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.h - z19\.h}, z23\.h, z24\.h
+** ret
+*/
+TEST_XN (uzp_z18_z23, svfloat16x2_t, z18,
+ svuzp_f16_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.h, z29\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svfloat16x2_t, z23,
+ svuzp_f16_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.h - z29\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (uzp_z28_z0, svfloat16x2_t, z28,
+ svuzp_f16_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.h - z29\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svfloat16x2_t, z28,
+ svuzp_f16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.h - z29\.h}, z5\.h, z19\.h
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svfloat16x2_t, z28,
+ svuzp_f16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svfloat16x4_t, z0,
+ svuzp_f16_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svfloat16x4_t, z0,
+ svuzp_f16_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.h - z7\.h}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svfloat16x4_t, z4,
+ svuzp_f16_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svfloat16x4_t, z18,
+ svuzp_f16_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svfloat16x4_t, z23,
+ svuzp_f16_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svfloat16x4_t, z28,
+ svuzp_f16_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.s - z1\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (uzp_z0_z0, svfloat32x2_t, z0,
+ svuzp_f32_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.s - z1\.s}, z4\.s, z5\.s
+** ret
+*/
+TEST_XN (uzp_z0_z4, svfloat32x2_t, z0,
+ svuzp_f32_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.s - z5\.s}, z18\.s, z19\.s
+** ret
+*/
+TEST_XN (uzp_z4_z18, svfloat32x2_t, z4,
+ svuzp_f32_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.s - z19\.s}, z23\.s, z24\.s
+** ret
+*/
+TEST_XN (uzp_z18_z23, svfloat32x2_t, z18,
+ svuzp_f32_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.s, z29\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svfloat32x2_t, z23,
+ svuzp_f32_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.s - z29\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (uzp_z28_z0, svfloat32x2_t, z28,
+ svuzp_f32_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.s - z29\.s}, z0\.s, z23\.s
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svfloat32x2_t, z28,
+ svuzp_f32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.s - z29\.s}, z5\.s, z19\.s
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svfloat32x2_t, z28,
+ svuzp_f32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svfloat32x4_t, z0,
+ svuzp_f32_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svfloat32x4_t, z0,
+ svuzp_f32_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svfloat32x4_t, z4,
+ svuzp_f32_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svfloat32x4_t, z18,
+ svuzp_f32_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svfloat32x4_t, z23,
+ svuzp_f32_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svfloat32x4_t, z28,
+ svuzp_f32_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.d - z1\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (uzp_z0_z0, svfloat64x2_t, z0,
+ svuzp_f64_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.d - z1\.d}, z4\.d, z5\.d
+** ret
+*/
+TEST_XN (uzp_z0_z4, svfloat64x2_t, z0,
+ svuzp_f64_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.d - z5\.d}, z18\.d, z19\.d
+** ret
+*/
+TEST_XN (uzp_z4_z18, svfloat64x2_t, z4,
+ svuzp_f64_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.d - z19\.d}, z23\.d, z24\.d
+** ret
+*/
+TEST_XN (uzp_z18_z23, svfloat64x2_t, z18,
+ svuzp_f64_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.d, z29\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svfloat64x2_t, z23,
+ svuzp_f64_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.d - z29\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (uzp_z28_z0, svfloat64x2_t, z28,
+ svuzp_f64_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.d - z29\.d}, z0\.d, z23\.d
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svfloat64x2_t, z28,
+ svuzp_f64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.d - z29\.d}, z5\.d, z19\.d
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svfloat64x2_t, z28,
+ svuzp_f64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svfloat64x4_t, z0,
+ svuzp_f64_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svfloat64x4_t, z0,
+ svuzp_f64_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.d - z7\.d}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svfloat64x4_t, z4,
+ svuzp_f64_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svfloat64x4_t, z18,
+ svuzp_f64_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svfloat64x4_t, z23,
+ svuzp_f64_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svfloat64x4_t, z28,
+ svuzp_f64_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.h - z1\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (uzp_z0_z0, svint16x2_t, z0,
+ svuzp_s16_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.h - z1\.h}, z4\.h, z5\.h
+** ret
+*/
+TEST_XN (uzp_z0_z4, svint16x2_t, z0,
+ svuzp_s16_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.h - z5\.h}, z18\.h, z19\.h
+** ret
+*/
+TEST_XN (uzp_z4_z18, svint16x2_t, z4,
+ svuzp_s16_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.h - z19\.h}, z23\.h, z24\.h
+** ret
+*/
+TEST_XN (uzp_z18_z23, svint16x2_t, z18,
+ svuzp_s16_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.h, z29\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svint16x2_t, z23,
+ svuzp_s16_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.h - z29\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (uzp_z28_z0, svint16x2_t, z28,
+ svuzp_s16_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.h - z29\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svint16x2_t, z28,
+ svuzp_s16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.h - z29\.h}, z5\.h, z19\.h
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svint16x2_t, z28,
+ svuzp_s16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svint16x4_t, z0,
+ svuzp_s16_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svint16x4_t, z0,
+ svuzp_s16_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.h - z7\.h}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svint16x4_t, z4,
+ svuzp_s16_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svint16x4_t, z18,
+ svuzp_s16_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svint16x4_t, z23,
+ svuzp_s16_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svint16x4_t, z28,
+ svuzp_s16_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.s - z1\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (uzp_z0_z0, svint32x2_t, z0,
+ svuzp_s32_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.s - z1\.s}, z4\.s, z5\.s
+** ret
+*/
+TEST_XN (uzp_z0_z4, svint32x2_t, z0,
+ svuzp_s32_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.s - z5\.s}, z18\.s, z19\.s
+** ret
+*/
+TEST_XN (uzp_z4_z18, svint32x2_t, z4,
+ svuzp_s32_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.s - z19\.s}, z23\.s, z24\.s
+** ret
+*/
+TEST_XN (uzp_z18_z23, svint32x2_t, z18,
+ svuzp_s32_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.s, z29\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svint32x2_t, z23,
+ svuzp_s32_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.s - z29\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (uzp_z28_z0, svint32x2_t, z28,
+ svuzp_s32_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.s - z29\.s}, z0\.s, z23\.s
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svint32x2_t, z28,
+ svuzp_s32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.s - z29\.s}, z5\.s, z19\.s
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svint32x2_t, z28,
+ svuzp_s32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svint32x4_t, z0,
+ svuzp_s32_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svint32x4_t, z0,
+ svuzp_s32_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svint32x4_t, z4,
+ svuzp_s32_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svint32x4_t, z18,
+ svuzp_s32_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svint32x4_t, z23,
+ svuzp_s32_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svint32x4_t, z28,
+ svuzp_s32_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.d - z1\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (uzp_z0_z0, svint64x2_t, z0,
+ svuzp_s64_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.d - z1\.d}, z4\.d, z5\.d
+** ret
+*/
+TEST_XN (uzp_z0_z4, svint64x2_t, z0,
+ svuzp_s64_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.d - z5\.d}, z18\.d, z19\.d
+** ret
+*/
+TEST_XN (uzp_z4_z18, svint64x2_t, z4,
+ svuzp_s64_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.d - z19\.d}, z23\.d, z24\.d
+** ret
+*/
+TEST_XN (uzp_z18_z23, svint64x2_t, z18,
+ svuzp_s64_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.d, z29\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svint64x2_t, z23,
+ svuzp_s64_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.d - z29\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (uzp_z28_z0, svint64x2_t, z28,
+ svuzp_s64_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.d - z29\.d}, z0\.d, z23\.d
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svint64x2_t, z28,
+ svuzp_s64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.d - z29\.d}, z5\.d, z19\.d
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svint64x2_t, z28,
+ svuzp_s64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svint64x4_t, z0,
+ svuzp_s64_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svint64x4_t, z0,
+ svuzp_s64_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.d - z7\.d}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svint64x4_t, z4,
+ svuzp_s64_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svint64x4_t, z18,
+ svuzp_s64_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svint64x4_t, z23,
+ svuzp_s64_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svint64x4_t, z28,
+ svuzp_s64_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.b - z1\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (uzp_z0_z0, svint8x2_t, z0,
+ svuzp_s8_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.b - z1\.b}, z4\.b, z5\.b
+** ret
+*/
+TEST_XN (uzp_z0_z4, svint8x2_t, z0,
+ svuzp_s8_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.b - z5\.b}, z18\.b, z19\.b
+** ret
+*/
+TEST_XN (uzp_z4_z18, svint8x2_t, z4,
+ svuzp_s8_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.b - z19\.b}, z23\.b, z24\.b
+** ret
+*/
+TEST_XN (uzp_z18_z23, svint8x2_t, z18,
+ svuzp_s8_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.b, z29\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svint8x2_t, z23,
+ svuzp_s8_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.b - z29\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (uzp_z28_z0, svint8x2_t, z28,
+ svuzp_s8_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.b - z29\.b}, z0\.b, z23\.b
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svint8x2_t, z28,
+ svuzp_s8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.b - z29\.b}, z5\.b, z19\.b
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svint8x2_t, z28,
+ svuzp_s8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svint8x4_t, z0,
+ svuzp_s8_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svint8x4_t, z0,
+ svuzp_s8_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.b - z7\.b}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svint8x4_t, z4,
+ svuzp_s8_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svint8x4_t, z18,
+ svuzp_s8_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svint8x4_t, z23,
+ svuzp_s8_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svint8x4_t, z28,
+ svuzp_s8_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.h - z1\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (uzp_z0_z0, svuint16x2_t, z0,
+ svuzp_u16_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.h - z1\.h}, z4\.h, z5\.h
+** ret
+*/
+TEST_XN (uzp_z0_z4, svuint16x2_t, z0,
+ svuzp_u16_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.h - z5\.h}, z18\.h, z19\.h
+** ret
+*/
+TEST_XN (uzp_z4_z18, svuint16x2_t, z4,
+ svuzp_u16_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.h - z19\.h}, z23\.h, z24\.h
+** ret
+*/
+TEST_XN (uzp_z18_z23, svuint16x2_t, z18,
+ svuzp_u16_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.h, z29\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svuint16x2_t, z23,
+ svuzp_u16_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.h - z29\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (uzp_z28_z0, svuint16x2_t, z28,
+ svuzp_u16_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.h - z29\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svuint16x2_t, z28,
+ svuzp_u16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.h - z29\.h}, z5\.h, z19\.h
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svuint16x2_t, z28,
+ svuzp_u16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svuint16x4_t, z0,
+ svuzp_u16_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svuint16x4_t, z0,
+ svuzp_u16_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.h - z7\.h}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svuint16x4_t, z4,
+ svuzp_u16_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svuint16x4_t, z18,
+ svuzp_u16_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svuint16x4_t, z23,
+ svuzp_u16_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svuint16x4_t, z28,
+ svuzp_u16_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.s - z1\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (uzp_z0_z0, svuint32x2_t, z0,
+ svuzp_u32_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.s - z1\.s}, z4\.s, z5\.s
+** ret
+*/
+TEST_XN (uzp_z0_z4, svuint32x2_t, z0,
+ svuzp_u32_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.s - z5\.s}, z18\.s, z19\.s
+** ret
+*/
+TEST_XN (uzp_z4_z18, svuint32x2_t, z4,
+ svuzp_u32_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.s - z19\.s}, z23\.s, z24\.s
+** ret
+*/
+TEST_XN (uzp_z18_z23, svuint32x2_t, z18,
+ svuzp_u32_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.s, z29\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svuint32x2_t, z23,
+ svuzp_u32_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.s - z29\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (uzp_z28_z0, svuint32x2_t, z28,
+ svuzp_u32_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.s - z29\.s}, z0\.s, z23\.s
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svuint32x2_t, z28,
+ svuzp_u32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.s - z29\.s}, z5\.s, z19\.s
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svuint32x2_t, z28,
+ svuzp_u32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svuint32x4_t, z0,
+ svuzp_u32_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svuint32x4_t, z0,
+ svuzp_u32_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svuint32x4_t, z4,
+ svuzp_u32_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svuint32x4_t, z18,
+ svuzp_u32_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svuint32x4_t, z23,
+ svuzp_u32_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svuint32x4_t, z28,
+ svuzp_u32_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.d - z1\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (uzp_z0_z0, svuint64x2_t, z0,
+ svuzp_u64_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.d - z1\.d}, z4\.d, z5\.d
+** ret
+*/
+TEST_XN (uzp_z0_z4, svuint64x2_t, z0,
+ svuzp_u64_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.d - z5\.d}, z18\.d, z19\.d
+** ret
+*/
+TEST_XN (uzp_z4_z18, svuint64x2_t, z4,
+ svuzp_u64_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.d - z19\.d}, z23\.d, z24\.d
+** ret
+*/
+TEST_XN (uzp_z18_z23, svuint64x2_t, z18,
+ svuzp_u64_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.d, z29\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svuint64x2_t, z23,
+ svuzp_u64_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.d - z29\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (uzp_z28_z0, svuint64x2_t, z28,
+ svuzp_u64_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.d - z29\.d}, z0\.d, z23\.d
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svuint64x2_t, z28,
+ svuzp_u64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.d - z29\.d}, z5\.d, z19\.d
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svuint64x2_t, z28,
+ svuzp_u64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svuint64x4_t, z0,
+ svuzp_u64_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svuint64x4_t, z0,
+ svuzp_u64_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.d - z7\.d}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svuint64x4_t, z4,
+ svuzp_u64_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svuint64x4_t, z18,
+ svuzp_u64_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svuint64x4_t, z23,
+ svuzp_u64_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svuint64x4_t, z28,
+ svuzp_u64_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.b - z1\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (uzp_z0_z0, svuint8x2_t, z0,
+ svuzp_u8_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.b - z1\.b}, z4\.b, z5\.b
+** ret
+*/
+TEST_XN (uzp_z0_z4, svuint8x2_t, z0,
+ svuzp_u8_x2 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** uzp {z4\.b - z5\.b}, z18\.b, z19\.b
+** ret
+*/
+TEST_XN (uzp_z4_z18, svuint8x2_t, z4,
+ svuzp_u8_x2 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** uzp {z18\.b - z19\.b}, z23\.b, z24\.b
+** ret
+*/
+TEST_XN (uzp_z18_z23, svuint8x2_t, z18,
+ svuzp_u8_x2 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, z28\.b, z29\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svuint8x2_t, z23,
+ svuzp_u8_x2 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.b - z29\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (uzp_z28_z0, svuint8x2_t, z28,
+ svuzp_u8_x2 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z28_z0_z23:
+** uzp {z28\.b - z29\.b}, z0\.b, z23\.b
+** ret
+*/
+TEST_XN (uzp_z28_z0_z23, svuint8x2_t, z28,
+ svuzp_u8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzp_z28_z5_z19:
+** uzp {z28\.b - z29\.b}, z5\.b, z19\.b
+** ret
+*/
+TEST_XN (uzp_z28_z5_z19, svuint8x2_t, z28,
+ svuzp_u8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzp_z0_z0:
+** uzp {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (uzp_z0_z0, svuint8x4_t, z0,
+ svuzp_u8_x4 (z0),
+ svuzp (z0))
+
+/*
+** uzp_z0_z4:
+** uzp {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (uzp_z0_z4, svuint8x4_t, z0,
+ svuzp_u8_x4 (z4),
+ svuzp (z4))
+
+/*
+** uzp_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.b - z7\.b}, [^\n]+
+** ret
+*/
+TEST_XN (uzp_z4_z18, svuint8x4_t, z4,
+ svuzp_u8_x4 (z18),
+ svuzp (z18))
+
+/*
+** uzp_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z18_z23, svuint8x4_t, z18,
+ svuzp_u8_x4 (z23),
+ svuzp (z23))
+
+/*
+** uzp_z23_z28:
+** uzp [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzp_z23_z28, svuint8x4_t, z23,
+ svuzp_u8_x4 (z28),
+ svuzp (z28))
+
+/*
+** uzp_z28_z0:
+** uzp {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (uzp_z28_z0, svuint8x4_t, z28,
+ svuzp_u8_x4 (z0),
+ svuzp (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svbfloat16x2_t, z0,
+ svuzpq_bf16_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svbfloat16x2_t, z0,
+ svuzpq_bf16_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svbfloat16x2_t, z4,
+ svuzpq_bf16_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svbfloat16x2_t, z18,
+ svuzpq_bf16_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svbfloat16x2_t, z23,
+ svuzpq_bf16_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svbfloat16x2_t, z28,
+ svuzpq_bf16_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svbfloat16x2_t, z28,
+ svuzpq_bf16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svbfloat16x2_t, z28,
+ svuzpq_bf16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svbfloat16x4_t, z0,
+ svuzpq_bf16_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svbfloat16x4_t, z0,
+ svuzpq_bf16_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svbfloat16x4_t, z4,
+ svuzpq_bf16_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svbfloat16x4_t, z18,
+ svuzpq_bf16_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svbfloat16x4_t, z23,
+ svuzpq_bf16_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svbfloat16x4_t, z28,
+ svuzpq_bf16_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svfloat16x2_t, z0,
+ svuzpq_f16_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svfloat16x2_t, z0,
+ svuzpq_f16_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svfloat16x2_t, z4,
+ svuzpq_f16_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svfloat16x2_t, z18,
+ svuzpq_f16_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svfloat16x2_t, z23,
+ svuzpq_f16_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svfloat16x2_t, z28,
+ svuzpq_f16_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svfloat16x2_t, z28,
+ svuzpq_f16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svfloat16x2_t, z28,
+ svuzpq_f16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svfloat16x4_t, z0,
+ svuzpq_f16_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svfloat16x4_t, z0,
+ svuzpq_f16_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svfloat16x4_t, z4,
+ svuzpq_f16_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svfloat16x4_t, z18,
+ svuzpq_f16_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svfloat16x4_t, z23,
+ svuzpq_f16_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svfloat16x4_t, z28,
+ svuzpq_f16_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svfloat32x2_t, z0,
+ svuzpq_f32_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svfloat32x2_t, z0,
+ svuzpq_f32_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svfloat32x2_t, z4,
+ svuzpq_f32_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svfloat32x2_t, z18,
+ svuzpq_f32_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svfloat32x2_t, z23,
+ svuzpq_f32_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svfloat32x2_t, z28,
+ svuzpq_f32_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svfloat32x2_t, z28,
+ svuzpq_f32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svfloat32x2_t, z28,
+ svuzpq_f32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svfloat32x4_t, z0,
+ svuzpq_f32_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svfloat32x4_t, z0,
+ svuzpq_f32_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svfloat32x4_t, z4,
+ svuzpq_f32_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svfloat32x4_t, z18,
+ svuzpq_f32_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svfloat32x4_t, z23,
+ svuzpq_f32_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svfloat32x4_t, z28,
+ svuzpq_f32_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svfloat64x2_t, z0,
+ svuzpq_f64_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svfloat64x2_t, z0,
+ svuzpq_f64_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svfloat64x2_t, z4,
+ svuzpq_f64_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svfloat64x2_t, z18,
+ svuzpq_f64_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svfloat64x2_t, z23,
+ svuzpq_f64_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svfloat64x2_t, z28,
+ svuzpq_f64_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svfloat64x2_t, z28,
+ svuzpq_f64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svfloat64x2_t, z28,
+ svuzpq_f64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svfloat64x4_t, z0,
+ svuzpq_f64_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svfloat64x4_t, z0,
+ svuzpq_f64_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svfloat64x4_t, z4,
+ svuzpq_f64_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svfloat64x4_t, z18,
+ svuzpq_f64_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svfloat64x4_t, z23,
+ svuzpq_f64_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svfloat64x4_t, z28,
+ svuzpq_f64_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svint16x2_t, z0,
+ svuzpq_s16_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svint16x2_t, z0,
+ svuzpq_s16_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svint16x2_t, z4,
+ svuzpq_s16_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svint16x2_t, z18,
+ svuzpq_s16_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svint16x2_t, z23,
+ svuzpq_s16_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svint16x2_t, z28,
+ svuzpq_s16_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svint16x2_t, z28,
+ svuzpq_s16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svint16x2_t, z28,
+ svuzpq_s16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svint16x4_t, z0,
+ svuzpq_s16_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svint16x4_t, z0,
+ svuzpq_s16_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svint16x4_t, z4,
+ svuzpq_s16_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svint16x4_t, z18,
+ svuzpq_s16_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svint16x4_t, z23,
+ svuzpq_s16_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svint16x4_t, z28,
+ svuzpq_s16_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svint32x2_t, z0,
+ svuzpq_s32_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svint32x2_t, z0,
+ svuzpq_s32_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svint32x2_t, z4,
+ svuzpq_s32_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svint32x2_t, z18,
+ svuzpq_s32_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svint32x2_t, z23,
+ svuzpq_s32_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svint32x2_t, z28,
+ svuzpq_s32_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svint32x2_t, z28,
+ svuzpq_s32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svint32x2_t, z28,
+ svuzpq_s32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svint32x4_t, z0,
+ svuzpq_s32_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svint32x4_t, z0,
+ svuzpq_s32_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svint32x4_t, z4,
+ svuzpq_s32_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svint32x4_t, z18,
+ svuzpq_s32_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svint32x4_t, z23,
+ svuzpq_s32_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svint32x4_t, z28,
+ svuzpq_s32_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svint64x2_t, z0,
+ svuzpq_s64_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svint64x2_t, z0,
+ svuzpq_s64_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svint64x2_t, z4,
+ svuzpq_s64_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svint64x2_t, z18,
+ svuzpq_s64_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svint64x2_t, z23,
+ svuzpq_s64_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svint64x2_t, z28,
+ svuzpq_s64_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svint64x2_t, z28,
+ svuzpq_s64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svint64x2_t, z28,
+ svuzpq_s64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svint64x4_t, z0,
+ svuzpq_s64_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svint64x4_t, z0,
+ svuzpq_s64_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svint64x4_t, z4,
+ svuzpq_s64_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svint64x4_t, z18,
+ svuzpq_s64_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svint64x4_t, z23,
+ svuzpq_s64_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svint64x4_t, z28,
+ svuzpq_s64_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svint8x2_t, z0,
+ svuzpq_s8_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svint8x2_t, z0,
+ svuzpq_s8_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svint8x2_t, z4,
+ svuzpq_s8_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svint8x2_t, z18,
+ svuzpq_s8_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svint8x2_t, z23,
+ svuzpq_s8_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svint8x2_t, z28,
+ svuzpq_s8_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svint8x2_t, z28,
+ svuzpq_s8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svint8x2_t, z28,
+ svuzpq_s8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svint8x4_t, z0,
+ svuzpq_s8_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svint8x4_t, z0,
+ svuzpq_s8_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svint8x4_t, z4,
+ svuzpq_s8_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svint8x4_t, z18,
+ svuzpq_s8_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svint8x4_t, z23,
+ svuzpq_s8_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svint8x4_t, z28,
+ svuzpq_s8_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svuint16x2_t, z0,
+ svuzpq_u16_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svuint16x2_t, z0,
+ svuzpq_u16_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svuint16x2_t, z4,
+ svuzpq_u16_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svuint16x2_t, z18,
+ svuzpq_u16_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svuint16x2_t, z23,
+ svuzpq_u16_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svuint16x2_t, z28,
+ svuzpq_u16_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svuint16x2_t, z28,
+ svuzpq_u16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svuint16x2_t, z28,
+ svuzpq_u16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svuint16x4_t, z0,
+ svuzpq_u16_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svuint16x4_t, z0,
+ svuzpq_u16_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svuint16x4_t, z4,
+ svuzpq_u16_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svuint16x4_t, z18,
+ svuzpq_u16_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svuint16x4_t, z23,
+ svuzpq_u16_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svuint16x4_t, z28,
+ svuzpq_u16_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svuint32x2_t, z0,
+ svuzpq_u32_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svuint32x2_t, z0,
+ svuzpq_u32_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svuint32x2_t, z4,
+ svuzpq_u32_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svuint32x2_t, z18,
+ svuzpq_u32_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svuint32x2_t, z23,
+ svuzpq_u32_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svuint32x2_t, z28,
+ svuzpq_u32_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svuint32x2_t, z28,
+ svuzpq_u32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svuint32x2_t, z28,
+ svuzpq_u32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svuint32x4_t, z0,
+ svuzpq_u32_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svuint32x4_t, z0,
+ svuzpq_u32_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svuint32x4_t, z4,
+ svuzpq_u32_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svuint32x4_t, z18,
+ svuzpq_u32_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svuint32x4_t, z23,
+ svuzpq_u32_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svuint32x4_t, z28,
+ svuzpq_u32_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svuint64x2_t, z0,
+ svuzpq_u64_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svuint64x2_t, z0,
+ svuzpq_u64_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svuint64x2_t, z4,
+ svuzpq_u64_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svuint64x2_t, z18,
+ svuzpq_u64_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svuint64x2_t, z23,
+ svuzpq_u64_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svuint64x2_t, z28,
+ svuzpq_u64_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svuint64x2_t, z28,
+ svuzpq_u64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svuint64x2_t, z28,
+ svuzpq_u64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svuint64x4_t, z0,
+ svuzpq_u64_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svuint64x4_t, z0,
+ svuzpq_u64_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svuint64x4_t, z4,
+ svuzpq_u64_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svuint64x4_t, z18,
+ svuzpq_u64_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svuint64x4_t, z23,
+ svuzpq_u64_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svuint64x4_t, z28,
+ svuzpq_u64_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svuint8x2_t, z0,
+ svuzpq_u8_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svuint8x2_t, z0,
+ svuzpq_u8_x2 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** uzp {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svuint8x2_t, z4,
+ svuzpq_u8_x2 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** uzp {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svuint8x2_t, z18,
+ svuzpq_u8_x2 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svuint8x2_t, z23,
+ svuzpq_u8_x2 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svuint8x2_t, z28,
+ svuzpq_u8_x2 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z28_z0_z23:
+** uzp {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z0_z23, svuint8x2_t, z28,
+ svuzpq_u8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** uzpq_z28_z5_z19:
+** uzp {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (uzpq_z28_z5_z19, svuint8x2_t, z28,
+ svuzpq_u8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** uzpq_z0_z0:
+** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z0, svuint8x4_t, z0,
+ svuzpq_u8_x4 (z0),
+ svuzpq (z0))
+
+/*
+** uzpq_z0_z4:
+** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (uzpq_z0_z4, svuint8x4_t, z0,
+ svuzpq_u8_x4 (z4),
+ svuzpq (z4))
+
+/*
+** uzpq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z4_z18, svuint8x4_t, z4,
+ svuzpq_u8_x4 (z18),
+ svuzpq (z18))
+
+/*
+** uzpq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uzp {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z18_z23, svuint8x4_t, z18,
+ svuzpq_u8_x4 (z23),
+ svuzpq (z23))
+
+/*
+** uzpq_z23_z28:
+** uzp [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (uzpq_z23_z28, svuint8x4_t, z23,
+ svuzpq_u8_x4 (z28),
+ svuzpq (z28))
+
+/*
+** uzpq_z28_z0:
+** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (uzpq_z28_z0, svuint8x4_t, z28,
+ svuzpq_u8_x4 (z0),
+ svuzpq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** vdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** bfvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (0, z0, z4, 0),
+ svvdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** vdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** bfvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (w0, z0, z7, 1),
+ svvdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** vdot_lane_w8_z28_z4_2:
+** bfvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (w8, z28, z4, 2),
+ svvdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** vdot_lane_w8p7_z0_z4_3:
+** bfvdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (w8 + 7, z0, z4, 3),
+ svvdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** vdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** bfvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (w8 + 8, z0, z4, 0),
+ svvdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** vdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** bfvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (w0 - 1, z0, z4, 1),
+ svvdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** vdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** bfvdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (w8, z4, z15, 2),
+ svvdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** vdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** bfvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (w8, z28, z16, 3),
+ svvdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** vdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** bfvdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (w8, z17, z7, 0),
+ svvdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** vdot_lane_w8_z22_z4_1:
+** bfvdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t,
+ svvdot_lane_za32_bf16_vg1x2 (w8, z22, z4, 1),
+ svvdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** vdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** fvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (0, z0, z4, 0),
+ svvdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** vdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** fvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (w0, z0, z7, 1),
+ svvdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** vdot_lane_w8_z28_z4_2:
+** fvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (w8, z28, z4, 2),
+ svvdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** vdot_lane_w8p7_z0_z4_3:
+** fvdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (w8 + 7, z0, z4, 3),
+ svvdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** vdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** fvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (w8 + 8, z0, z4, 0),
+ svvdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** vdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** fvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (w0 - 1, z0, z4, 1),
+ svvdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** vdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** fvdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (w8, z4, z15, 2),
+ svvdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** vdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** fvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (w8, z28, z16, 3),
+ svvdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** vdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** fvdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (w8, z17, z7, 0),
+ svvdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** vdot_lane_w8_z22_z4_1:
+** fvdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t,
+ svvdot_lane_za32_f16_vg1x2 (w8, z22, z4, 1),
+ svvdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** vdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** svdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (0, z0, z4, 0),
+ svvdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** vdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** svdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (w0, z0, z7, 1),
+ svvdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** vdot_lane_w8_z28_z4_2:
+** svdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (w8, z28, z4, 2),
+ svvdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** vdot_lane_w8p7_z0_z4_3:
+** svdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (w8 + 7, z0, z4, 3),
+ svvdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** vdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** svdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (w8 + 8, z0, z4, 0),
+ svvdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** vdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** svdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (w0 - 1, z0, z4, 1),
+ svvdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** vdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** svdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (w8, z4, z15, 2),
+ svvdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** vdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** svdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (w8, z28, z16, 3),
+ svvdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** vdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** svdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (w8, z17, z7, 0),
+ svvdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** vdot_lane_w8_z22_z4_1:
+** svdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svint16x2_t, svint16_t,
+ svvdot_lane_za32_s16_vg1x2 (w8, z22, z4, 1),
+ svvdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** vdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** svdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (0, z0, z4, 0),
+ svvdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** vdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** svdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (w0, z0, z7, 1),
+ svvdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** vdot_lane_w8_z28_z4_2:
+** svdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (w8, z28, z4, 2),
+ svvdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** vdot_lane_w8p7_z0_z4_3:
+** svdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (w8 + 7, z0, z4, 3),
+ svvdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** vdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** svdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (w8 + 8, z0, z4, 0),
+ svvdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** vdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** svdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (w0 - 1, z0, z4, 1),
+ svvdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** vdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** svdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (w8, z4, z15, 2),
+ svvdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** vdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** svdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (w8, z28, z16, 3),
+ svvdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** vdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** svdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (w8, z17, z7, 0),
+ svvdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** vdot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** svdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svint8x4_t, svint8_t,
+ svvdot_lane_za32_s8_vg1x4 (w8, z22, z4, 1),
+ svvdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** vdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** uvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (0, z0, z4, 0),
+ svvdot_lane_za32_vg1x2 (0, z0, z4, 0))
+
+/*
+** vdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** uvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (w0, z0, z7, 1),
+ svvdot_lane_za32_vg1x2 (w0, z0, z7, 1))
+
+/*
+** vdot_lane_w8_z28_z4_2:
+** uvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (w8, z28, z4, 2),
+ svvdot_lane_za32_vg1x2 (w8, z28, z4, 2))
+
+/*
+** vdot_lane_w8p7_z0_z4_3:
+** uvdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (w8 + 7, z0, z4, 3),
+ svvdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** vdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** uvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (w8 + 8, z0, z4, 0),
+ svvdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0))
+
+/*
+** vdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** uvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (w0 - 1, z0, z4, 1),
+ svvdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1))
+
+/*
+** vdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** uvdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (w8, z4, z15, 2),
+ svvdot_lane_za32_vg1x2 (w8, z4, z15, 2))
+
+/*
+** vdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** uvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (w8, z28, z16, 3),
+ svvdot_lane_za32_vg1x2 (w8, z28, z16, 3))
+
+/*
+** vdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** uvdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (w8, z17, z7, 0),
+ svvdot_lane_za32_vg1x2 (w8, z17, z7, 0))
+
+/*
+** vdot_lane_w8_z22_z4_1:
+** uvdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t,
+ svvdot_lane_za32_u16_vg1x2 (w8, z22, z4, 1),
+ svvdot_lane_za32_vg1x2 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** vdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** uvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (0, z0, z4, 0),
+ svvdot_lane_za32_vg1x4 (0, z0, z4, 0))
+
+/*
+** vdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** uvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (w0, z0, z7, 1),
+ svvdot_lane_za32_vg1x4 (w0, z0, z7, 1))
+
+/*
+** vdot_lane_w8_z28_z4_2:
+** uvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (w8, z28, z4, 2),
+ svvdot_lane_za32_vg1x4 (w8, z28, z4, 2))
+
+/*
+** vdot_lane_w8p7_z0_z4_3:
+** uvdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (w8 + 7, z0, z4, 3),
+ svvdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** vdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** uvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (w8 + 8, z0, z4, 0),
+ svvdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** vdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** uvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (w0 - 1, z0, z4, 1),
+ svvdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** vdot_lane_w8_z4_z15_2:
+** str d15, \[sp, #?-16\]!
+** uvdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (w8, z4, z15, 2),
+ svvdot_lane_za32_vg1x4 (w8, z4, z15, 2))
+
+/*
+** vdot_lane_w8_z28_z16_3:
+** mov (z[0-7]).d, z16.d
+** uvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (w8, z28, z16, 3),
+ svvdot_lane_za32_vg1x4 (w8, z28, z16, 3))
+
+/*
+** vdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uvdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (w8, z17, z7, 0),
+ svvdot_lane_za32_vg1x4 (w8, z17, z7, 0))
+
+/*
+** vdot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uvdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svuint8x4_t, svuint8_t,
+ svvdot_lane_za32_u8_vg1x4 (w8, z22, z4, 1),
+ svvdot_lane_za32_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** vdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** svdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (0, z0, z4, 0),
+ svvdot_lane_za64_vg1x4 (0, z0, z4, 0))
+
+/*
+** vdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** svdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (w0, z0, z7, 1),
+ svvdot_lane_za64_vg1x4 (w0, z0, z7, 1))
+
+/*
+** vdot_lane_w8_z28_z4_0:
+** svdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z4_0, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (w8, z28, z4, 0),
+ svvdot_lane_za64_vg1x4 (w8, z28, z4, 0))
+
+/*
+** vdot_lane_w8p7_z0_z4_1:
+** svdot za\.d\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_1, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (w8 + 7, z0, z4, 1),
+ svvdot_lane_za64_vg1x4 (w8 + 7, z0, z4, 1))
+
+/*
+** vdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** svdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (w8 + 8, z0, z4, 0),
+ svvdot_lane_za64_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** vdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** svdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (w0 - 1, z0, z4, 1),
+ svvdot_lane_za64_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** vdot_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** svdot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_0, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (w8, z4, z15, 0),
+ svvdot_lane_za64_vg1x4 (w8, z4, z15, 0))
+
+/*
+** vdot_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** svdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z16_1, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (w8, z28, z16, 1),
+ svvdot_lane_za64_vg1x4 (w8, z28, z16, 1))
+
+/*
+** vdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** svdot za\.d\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (w8, z17, z7, 0),
+ svvdot_lane_za64_vg1x4 (w8, z17, z7, 0))
+
+/*
+** vdot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** svdot za\.d\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svint16x4_t, svint16_t,
+ svvdot_lane_za64_s16_vg1x4 (w8, z22, z4, 1),
+ svvdot_lane_za64_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+sme-i16i64"
+
+#include "test_sme2_acle.h"
+
+/*
+** vdot_lane_0_z0_z4_0:
+** mov (w8|w9|w10|w11), #?0
+** uvdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (0, z0, z4, 0),
+ svvdot_lane_za64_vg1x4 (0, z0, z4, 0))
+
+/*
+** vdot_lane_w0_z0_z7_1:
+** mov (w8|w9|w10|w11), w0
+** uvdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (w0, z0, z7, 1),
+ svvdot_lane_za64_vg1x4 (w0, z0, z7, 1))
+
+/*
+** vdot_lane_w8_z28_z4_0:
+** uvdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z4_0, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (w8, z28, z4, 0),
+ svvdot_lane_za64_vg1x4 (w8, z28, z4, 0))
+
+/*
+** vdot_lane_w8p7_z0_z4_1:
+** uvdot za\.d\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_1, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (w8 + 7, z0, z4, 1),
+ svvdot_lane_za64_vg1x4 (w8 + 7, z0, z4, 1))
+
+/*
+** vdot_lane_w8p8_z0_z4_0:
+** add (w8|w9|w10|w11), w8, #?8
+** uvdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (w8 + 8, z0, z4, 0),
+ svvdot_lane_za64_vg1x4 (w8 + 8, z0, z4, 0))
+
+/*
+** vdot_lane_w0m1_z0_z4_1:
+** sub (w8|w9|w10|w11), w0, #?1
+** uvdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (w0 - 1, z0, z4, 1),
+ svvdot_lane_za64_vg1x4 (w0 - 1, z0, z4, 1))
+
+/*
+** vdot_lane_w8_z4_z15_0:
+** str d15, \[sp, #?-16\]!
+** uvdot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\]
+** ldr d15, \[sp\], #?16
+** ret
+*/
+TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_0, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (w8, z4, z15, 0),
+ svvdot_lane_za64_vg1x4 (w8, z4, z15, 0))
+
+/*
+** vdot_lane_w8_z28_z16_1:
+** mov (z[0-7]).d, z16.d
+** uvdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z28_z16_1, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (w8, z28, z16, 1),
+ svvdot_lane_za64_vg1x4 (w8, z28, z16, 1))
+
+/*
+** vdot_lane_w8_z17_z7_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uvdot za\.d\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (w8, z17, z7, 0),
+ svvdot_lane_za64_vg1x4 (w8, z17, z7, 0))
+
+/*
+** vdot_lane_w8_z22_z4_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** uvdot za\.d\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+** ret
+*/
+TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t,
+ svvdot_lane_za64_u16_vg1x4 (w8, z22, z4, 1),
+ svvdot_lane_za64_vg1x4 (w8, z22, z4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b16_s64_x2 (x0, x1),
+ p1 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b16_s64_x2 (x0, x1),
+ p4 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b16_s64_x2 (x0, x1),
+ p9 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b16_s64_x2 (x0, x1),
+ p14 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b16_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b16_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b16_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b16_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b16_u64_x2 (x0, x1),
+ p4 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b16_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b16_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b16_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b32_s64_x2 (x0, x1),
+ p1 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b32_s64_x2 (x0, x1),
+ p4 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b32_s64_x2 (x0, x1),
+ p9 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b32_s64_x2 (x0, x1),
+ p14 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b32_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b32_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b32_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b32_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b32_u64_x2 (x0, x1),
+ p4 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b32_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b32_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b32_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b64_s64_x2 (x0, x1),
+ p1 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b64_s64_x2 (x0, x1),
+ p4 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b64_s64_x2 (x0, x1),
+ p9 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b64_s64_x2 (x0, x1),
+ p14 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b64_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b64_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b64_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b64_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b64_u64_x2 (x0, x1),
+ p4 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b64_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b64_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b64_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b8_s64_x2 (x0, x1),
+ p1 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b8_s64_x2 (x0, x1),
+ p4 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b8_s64_x2 (x0, x1),
+ p9 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b8_s64_x2 (x0, x1),
+ p14 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b8_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b8_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b8_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b8_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b8_u64_x2 (x0, x1),
+ p4 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b8_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b8_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b8_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c16_s64 (x0, x1, 2),
+ pn0 = svwhilege_c16 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c16_s64 (x0, x1, 4),
+ pn7 = svwhilege_c16 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c16_s64 (x0, x1, 2),
+ pn8 = svwhilege_c16 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c16_s64 (x0, x1, 4),
+ pn15 = svwhilege_c16 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c16_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c16_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c16_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c16_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c16_u64 (x0, x1, 2),
+ pn8 = svwhilege_c16 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c16_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c16_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c16_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c32_s64 (x0, x1, 2),
+ pn0 = svwhilege_c32 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c32_s64 (x0, x1, 4),
+ pn7 = svwhilege_c32 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c32_s64 (x0, x1, 2),
+ pn8 = svwhilege_c32 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c32_s64 (x0, x1, 4),
+ pn15 = svwhilege_c32 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c32_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c32_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c32_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c32_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c32_u64 (x0, x1, 2),
+ pn8 = svwhilege_c32 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c32_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c32_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c32_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c64_s64 (x0, x1, 2),
+ pn0 = svwhilege_c64 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c64_s64 (x0, x1, 4),
+ pn7 = svwhilege_c64 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c64_s64 (x0, x1, 2),
+ pn8 = svwhilege_c64 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c64_s64 (x0, x1, 4),
+ pn15 = svwhilege_c64 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c64_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c64_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c64_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c64_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c64_u64 (x0, x1, 2),
+ pn8 = svwhilege_c64 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c64_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c64_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c64_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c8_s64 (x0, x1, 2),
+ pn0 = svwhilege_c8 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c8_s64 (x0, x1, 4),
+ pn7 = svwhilege_c8 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c8_s64 (x0, x1, 2),
+ pn8 = svwhilege_c8 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c8_s64 (x0, x1, 4),
+ pn15 = svwhilege_c8 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c8_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c8_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c8_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c8_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c8_u64 (x0, x1, 2),
+ pn8 = svwhilege_c8 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c8_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c8_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c8_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b16_s64_x2 (x0, x1),
+ p1 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b16_s64_x2 (x0, x1),
+ p4 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b16_s64_x2 (x0, x1),
+ p9 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b16_s64_x2 (x0, x1),
+ p14 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b16_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b16_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b16_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b16_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b16_u64_x2 (x0, x1),
+ p4 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b16_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b16_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b16_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b32_s64_x2 (x0, x1),
+ p1 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b32_s64_x2 (x0, x1),
+ p4 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b32_s64_x2 (x0, x1),
+ p9 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b32_s64_x2 (x0, x1),
+ p14 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b32_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b32_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b32_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b32_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b32_u64_x2 (x0, x1),
+ p4 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b32_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b32_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b32_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b64_s64_x2 (x0, x1),
+ p1 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b64_s64_x2 (x0, x1),
+ p4 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b64_s64_x2 (x0, x1),
+ p9 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b64_s64_x2 (x0, x1),
+ p14 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b64_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b64_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b64_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b64_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b64_u64_x2 (x0, x1),
+ p4 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b64_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b64_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b64_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b8_s64_x2 (x0, x1),
+ p1 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b8_s64_x2 (x0, x1),
+ p4 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b8_s64_x2 (x0, x1),
+ p9 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b8_s64_x2 (x0, x1),
+ p14 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b8_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b8_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b8_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b8_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b8_u64_x2 (x0, x1),
+ p4 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b8_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b8_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b8_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c16_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c16 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c16_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c16 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c16_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c16 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c16_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c16 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c16_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c16_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c16_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c16_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c16_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c16 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c16_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c16_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c16_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c32_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c32 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c32_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c32 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c32_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c32 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c32_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c32 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c32_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c32_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c32_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c32_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c32_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c32 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c32_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c32_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c32_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c64_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c64 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c64_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c64 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c64_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c64 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c64_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c64 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c64_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c64_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c64_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c64_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c64_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c64 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c64_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c64_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c64_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c8_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c8 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c8_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c8 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c8_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c8 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c8_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c8 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c8_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c8_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c8_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c8_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c8_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c8 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c8_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c8_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c8_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b16_s64_x2 (x0, x1),
+ p1 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b16_s64_x2 (x0, x1),
+ p4 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b16_s64_x2 (x0, x1),
+ p9 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b16_s64_x2 (x0, x1),
+ p14 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b16_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b16_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b16_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b16_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b16_u64_x2 (x0, x1),
+ p4 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b16_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b16_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b16_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b32_s64_x2 (x0, x1),
+ p1 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b32_s64_x2 (x0, x1),
+ p4 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b32_s64_x2 (x0, x1),
+ p9 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b32_s64_x2 (x0, x1),
+ p14 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b32_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b32_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b32_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b32_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b32_u64_x2 (x0, x1),
+ p4 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b32_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b32_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b32_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b64_s64_x2 (x0, x1),
+ p1 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b64_s64_x2 (x0, x1),
+ p4 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b64_s64_x2 (x0, x1),
+ p9 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b64_s64_x2 (x0, x1),
+ p14 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b64_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b64_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b64_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b64_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b64_u64_x2 (x0, x1),
+ p4 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b64_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b64_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b64_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b8_s64_x2 (x0, x1),
+ p1 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b8_s64_x2 (x0, x1),
+ p4 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b8_s64_x2 (x0, x1),
+ p9 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b8_s64_x2 (x0, x1),
+ p14 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b8_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b8_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b8_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b8_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b8_u64_x2 (x0, x1),
+ p4 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b8_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b8_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b8_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c16_s64 (x0, x1, 2),
+ pn0 = svwhilele_c16 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c16_s64 (x0, x1, 4),
+ pn7 = svwhilele_c16 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c16_s64 (x0, x1, 2),
+ pn8 = svwhilele_c16 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c16_s64 (x0, x1, 4),
+ pn15 = svwhilele_c16 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c16_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c16_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c16_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c16_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c16_u64 (x0, x1, 2),
+ pn8 = svwhilele_c16 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c16_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c16_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c16_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c32_s64 (x0, x1, 2),
+ pn0 = svwhilele_c32 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c32_s64 (x0, x1, 4),
+ pn7 = svwhilele_c32 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c32_s64 (x0, x1, 2),
+ pn8 = svwhilele_c32 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c32_s64 (x0, x1, 4),
+ pn15 = svwhilele_c32 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c32_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c32_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c32_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c32_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c32_u64 (x0, x1, 2),
+ pn8 = svwhilele_c32 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c32_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c32_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c32_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c64_s64 (x0, x1, 2),
+ pn0 = svwhilele_c64 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c64_s64 (x0, x1, 4),
+ pn7 = svwhilele_c64 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c64_s64 (x0, x1, 2),
+ pn8 = svwhilele_c64 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c64_s64 (x0, x1, 4),
+ pn15 = svwhilele_c64 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c64_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c64_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c64_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c64_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c64_u64 (x0, x1, 2),
+ pn8 = svwhilele_c64 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c64_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c64_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c64_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c8_s64 (x0, x1, 2),
+ pn0 = svwhilele_c8 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c8_s64 (x0, x1, 4),
+ pn7 = svwhilele_c8 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c8_s64 (x0, x1, 2),
+ pn8 = svwhilele_c8 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c8_s64 (x0, x1, 4),
+ pn15 = svwhilele_c8 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c8_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c8_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c8_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c8_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c8_u64 (x0, x1, 2),
+ pn8 = svwhilele_c8 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c8_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c8_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c8_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b16_s64_x2 (x0, x1),
+ p1 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b16_s64_x2 (x0, x1),
+ p4 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b16_s64_x2 (x0, x1),
+ p9 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b16_s64_x2 (x0, x1),
+ p14 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b16_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b16_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b16_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b16_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b16_u64_x2 (x0, x1),
+ p4 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b16_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b16_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b16_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b32_s64_x2 (x0, x1),
+ p1 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b32_s64_x2 (x0, x1),
+ p4 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b32_s64_x2 (x0, x1),
+ p9 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b32_s64_x2 (x0, x1),
+ p14 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b32_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b32_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b32_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b32_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b32_u64_x2 (x0, x1),
+ p4 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b32_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b32_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b32_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b64_s64_x2 (x0, x1),
+ p1 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b64_s64_x2 (x0, x1),
+ p4 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b64_s64_x2 (x0, x1),
+ p9 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b64_s64_x2 (x0, x1),
+ p14 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b64_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b64_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b64_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b64_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b64_u64_x2 (x0, x1),
+ p4 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b64_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b64_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b64_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b8_s64_x2 (x0, x1),
+ p1 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b8_s64_x2 (x0, x1),
+ p4 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b8_s64_x2 (x0, x1),
+ p9 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b8_s64_x2 (x0, x1),
+ p14 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b8_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b8_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b8_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b8_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b8_u64_x2 (x0, x1),
+ p4 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b8_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b8_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b8_u64_x2 (x0, 5))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c16_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c16 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c16_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c16 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c16_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c16 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c16_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c16 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c16_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c16_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c16_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c16_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c16_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c16 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c16_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c16_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c16_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c32_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c32 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c32_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c32 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c32_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c32 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c32_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c32 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c32_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c32_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c32_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c32_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c32_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c32 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c32_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c32_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c32_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c64_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c64 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c64_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c64 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c64_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c64 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c64_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c64 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c64_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c64_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c64_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c64_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c64_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c64 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c64_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c64_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c64_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c8_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c8 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c8_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c8 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c8_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c8 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c8_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c8 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c8_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c8_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c8_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c8_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c8_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c8 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c8_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c8_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c8_u64 (x0, 5, 4))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za16_s16_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0h\.h\[\1, 0:1\], {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_0_0, svint16x2_t,
+ svwrite_hor_za16_s16_vg2 (0, 0, z0),
+ svwrite_hor_za16_s16_vg2 (0, 0, z0))
+
+/*
+** write_za16_u16_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1h\.h\[\1, 0:1\], {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z4_1_1, svuint16x2_t,
+ svwrite_hor_za16_u16_vg2 (1, 1, z4),
+ svwrite_hor_za16_u16_vg2 (1, 1, z4))
+
+/*
+** write_za16_f16_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova za0h\.h\[\1, 0:1\], {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_f16_z28_0_w11, svfloat16x2_t,
+ svwrite_hor_za16_f16_vg2 (0, w11, z28),
+ svwrite_hor_za16_f16_vg2 (0, w11, z28))
+
+/*
+** write_za16_bf16_z0_1_w12:
+** mova za1h\.h\[w12, 0:1\], {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_bf16_z0_1_w12, svbfloat16x2_t,
+ svwrite_hor_za16_bf16_vg2 (1, w12, z0),
+ svwrite_hor_za16_bf16_vg2 (1, w12, z0))
+
+/*
+** write_za16_u16_z18_0_w15:
+** mova za0h\.h\[w15, 0:1\], {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z18_0_w15, svuint16x2_t,
+ svwrite_hor_za16_u16_vg2 (0, w15, z18),
+ svwrite_hor_za16_u16_vg2 (0, w15, z18))
+
+/*
+** write_za16_s16_z23_1_w12p6:
+** mov [^\n]+
+** mov [^\n]+
+** mova za1h\.h\[w12, 6:7\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z23_1_w12p6, svint16x2_t,
+ svwrite_hor_za16_s16_vg2 (1, w12 + 6, z23),
+ svwrite_hor_za16_s16_vg2 (1, w12 + 6, z23))
+
+/*
+** write_za16_f16_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0h\.h\[\1, 0:1\], {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_f16_z4_0_w12p1, svfloat16x2_t,
+ svwrite_hor_za16_f16_vg2 (0, w12 + 1, z4),
+ svwrite_hor_za16_f16_vg2 (0, w12 + 1, z4))
+
+/*
+** write_za16_s16_z28_1_w12p2:
+** mova za1h\.h\[w12, 2:3\], {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z28_1_w12p2, svint16x2_t,
+ svwrite_hor_za16_s16_vg2 (1, w12 + 2, z28),
+ svwrite_hor_za16_s16_vg2 (1, w12 + 2, z28))
+
+/*
+** write_za16_u16_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0h\.h\[\1, 0:1\], {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z0_0_w15p3, svuint16x2_t,
+ svwrite_hor_za16_u16_vg2 (0, w15 + 3, z0),
+ svwrite_hor_za16_u16_vg2 (0, w15 + 3, z0))
+
+/*
+** write_za16_bf16_z4_1_w15p4:
+** mova za1h\.h\[w15, 4:5\], {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_bf16_z4_1_w15p4, svbfloat16x2_t,
+ svwrite_hor_za16_bf16_vg2 (1, w15 + 4, z4),
+ svwrite_hor_za16_bf16_vg2 (1, w15 + 4, z4))
+
+/*
+** write_za16_u16_z28_0_w12p7:
+** add (w[0-9]+), w12, #?7
+** mova za0h\.h\[\1, 0:1\], {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z28_0_w12p7, svuint16x2_t,
+ svwrite_hor_za16_u16_vg2 (0, w12 + 7, z28),
+ svwrite_hor_za16_u16_vg2 (0, w12 + 7, z28))
+
+/*
+** write_za16_s16_z0_1_w15p8:
+** add (w[0-9]+), w15, #?8
+** mova za1h\.h\[\1, 0:1\], {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_1_w15p8, svint16x2_t,
+ svwrite_hor_za16_s16_vg2 (1, w15 + 8, z0),
+ svwrite_hor_za16_s16_vg2 (1, w15 + 8, z0))
+
+/*
+** write_za16_u16_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0h\.h\[\1, 0:1\], {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z4_0_w12m1, svuint16x2_t,
+ svwrite_hor_za16_u16_vg2 (0, w12 - 1, z4),
+ svwrite_hor_za16_u16_vg2 (0, w12 - 1, z4))
+
+/*
+** write_za16_u16_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova za1h\.h\[\1, 0:1\], {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z18_1_w16, svuint16x2_t,
+ svwrite_hor_za16_u16_vg2 (1, w16, z18),
+ svwrite_hor_za16_u16_vg2 (1, w16, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za16_s16_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0h\.h\[\1, 0:3\], {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_0_0, svint16x4_t,
+ svwrite_hor_za16_s16_vg4 (0, 0, z0),
+ svwrite_hor_za16_s16_vg4 (0, 0, z0))
+
+/*
+** write_za16_u16_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1h\.h\[\1, 0:3\], {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z4_1_1, svuint16x4_t,
+ svwrite_hor_za16_u16_vg4 (1, 1, z4),
+ svwrite_hor_za16_u16_vg4 (1, 1, z4))
+
+/*
+** write_za16_f16_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova za0h\.h\[\1, 0:3\], {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_f16_z28_0_w11, svfloat16x4_t,
+ svwrite_hor_za16_f16_vg4 (0, w11, z28),
+ svwrite_hor_za16_f16_vg4 (0, w11, z28))
+
+/*
+** write_za16_s16_z0_1_w12:
+** mova za1h\.h\[w12, 0:3\], {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_1_w12, svint16x4_t,
+ svwrite_hor_za16_s16_vg4 (1, w12, z0),
+ svwrite_hor_za16_s16_vg4 (1, w12, z0))
+
+/*
+** write_za16_u16_z18_0_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0h\.h\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z18_0_w15, svuint16x4_t,
+ svwrite_hor_za16_u16_vg4 (0, w15, z18),
+ svwrite_hor_za16_u16_vg4 (0, w15, z18))
+
+/*
+** write_za16_bf16_z23_1_w12p4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za1h\.h\[w12, 4:7\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za16_bf16_z23_1_w12p4, svbfloat16x4_t,
+ svwrite_hor_za16_bf16_vg4 (1, w12 + 4, z23),
+ svwrite_hor_za16_bf16_vg4 (1, w12 + 4, z23))
+
+/*
+** write_za16_u16_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0h\.h\[\1, 0:3\], {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z4_0_w12p1, svuint16x4_t,
+ svwrite_hor_za16_u16_vg4 (0, w12 + 1, z4),
+ svwrite_hor_za16_u16_vg4 (0, w12 + 1, z4))
+
+/*
+** write_za16_s16_z28_1_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova za1h\.h\[\1, 0:3\], {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z28_1_w12p2, svint16x4_t,
+ svwrite_hor_za16_s16_vg4 (1, w12 + 2, z28),
+ svwrite_hor_za16_s16_vg4 (1, w12 + 2, z28))
+
+/*
+** write_za16_f16_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0h\.h\[\1, 0:3\], {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_f16_z0_0_w15p3, svfloat16x4_t,
+ svwrite_hor_za16_f16_vg4 (0, w15 + 3, z0),
+ svwrite_hor_za16_f16_vg4 (0, w15 + 3, z0))
+
+/*
+** write_za16_u16_z28_1_w12p6:
+** add (w[0-9]+), w12, #?6
+** mova za1h\.h\[\1, 0:3\], {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z28_1_w12p6, svuint16x4_t,
+ svwrite_hor_za16_u16_vg4 (1, w12 + 6, z28),
+ svwrite_hor_za16_u16_vg4 (1, w12 + 6, z28))
+
+/*
+** write_za16_s16_z0_0_w15p8:
+** add (w[0-9]+), w15, #?8
+** mova za0h\.h\[\1, 0:3\], {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_0_w15p8, svint16x4_t,
+ svwrite_hor_za16_s16_vg4 (0, w15 + 8, z0),
+ svwrite_hor_za16_s16_vg4 (0, w15 + 8, z0))
+
+/*
+** write_za16_bf16_z4_1_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za1h\.h\[\1, 0:3\], {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_bf16_z4_1_w12m1, svbfloat16x4_t,
+ svwrite_hor_za16_bf16_vg4 (1, w12 - 1, z4),
+ svwrite_hor_za16_bf16_vg4 (1, w12 - 1, z4))
+
+/*
+** write_za16_u16_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova za0h\.h\[\1, 0:3\], {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z28_0_w16, svuint16x4_t,
+ svwrite_hor_za16_u16_vg4 (0, w16, z28),
+ svwrite_hor_za16_u16_vg4 (0, w16, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za32_s32_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0h\.s\[\1, 0:1\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z0_0_0, svint32x2_t,
+ svwrite_hor_za32_s32_vg2 (0, 0, z0),
+ svwrite_hor_za32_s32_vg2 (0, 0, z0))
+
+/*
+** write_za32_u32_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1h\.s\[\1, 0:1\], {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z4_1_1, svuint32x2_t,
+ svwrite_hor_za32_u32_vg2 (1, 1, z4),
+ svwrite_hor_za32_u32_vg2 (1, 1, z4))
+
+/*
+** write_za32_f32_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova za2h\.s\[\1, 0:1\], {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z28_2_w11, svfloat32x2_t,
+ svwrite_hor_za32_f32_vg2 (2, w11, z28),
+ svwrite_hor_za32_f32_vg2 (2, w11, z28))
+
+/*
+** write_za32_f32_z0_3_w12:
+** mova za3h\.s\[w12, 0:1\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z0_3_w12, svfloat32x2_t,
+ svwrite_hor_za32_f32_vg2 (3, w12, z0),
+ svwrite_hor_za32_f32_vg2 (3, w12, z0))
+
+/*
+** write_za32_u32_z18_0_w15:
+** mova za0h\.s\[w15, 0:1\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z18_0_w15, svuint32x2_t,
+ svwrite_hor_za32_u32_vg2 (0, w15, z18),
+ svwrite_hor_za32_u32_vg2 (0, w15, z18))
+
+/*
+** write_za32_s32_z23_1_w12p2:
+** mov [^\n]+
+** mov [^\n]+
+** mova za1h\.s\[w12, 2:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z23_1_w12p2, svint32x2_t,
+ svwrite_hor_za32_s32_vg2 (1, w12 + 2, z23),
+ svwrite_hor_za32_s32_vg2 (1, w12 + 2, z23))
+
+/*
+** write_za32_f32_z4_2_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za2h\.s\[\1, 0:1\], {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z4_2_w12p1, svfloat32x2_t,
+ svwrite_hor_za32_f32_vg2 (2, w12 + 1, z4),
+ svwrite_hor_za32_f32_vg2 (2, w12 + 1, z4))
+
+/*
+** write_za32_u32_z0_3_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za3h\.s\[\1, 0:1\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z0_3_w15p3, svuint32x2_t,
+ svwrite_hor_za32_u32_vg2 (3, w15 + 3, z0),
+ svwrite_hor_za32_u32_vg2 (3, w15 + 3, z0))
+
+/*
+** write_za32_s32_z0_1_w15p4:
+** add (w[0-9]+), w15, #?4
+** mova za1h\.s\[\1, 0:1\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z0_1_w15p4, svint32x2_t,
+ svwrite_hor_za32_s32_vg2 (1, w15 + 4, z0),
+ svwrite_hor_za32_s32_vg2 (1, w15 + 4, z0))
+
+/*
+** write_za32_u32_z4_3_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za3h\.s\[\1, 0:1\], {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z4_3_w12m1, svuint32x2_t,
+ svwrite_hor_za32_u32_vg2 (3, w12 - 1, z4),
+ svwrite_hor_za32_u32_vg2 (3, w12 - 1, z4))
+
+/*
+** write_za32_u32_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova za1h\.s\[\1, 0:1\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z18_1_w16, svuint32x2_t,
+ svwrite_hor_za32_u32_vg2 (1, w16, z18),
+ svwrite_hor_za32_u32_vg2 (1, w16, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za32_s32_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0h\.s\[\1, 0:3\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z0_0_0, svint32x4_t,
+ svwrite_hor_za32_s32_vg4 (0, 0, z0),
+ svwrite_hor_za32_s32_vg4 (0, 0, z0))
+
+/*
+** write_za32_u32_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1h\.s\[\1, 0:3\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z4_1_1, svuint32x4_t,
+ svwrite_hor_za32_u32_vg4 (1, 1, z4),
+ svwrite_hor_za32_u32_vg4 (1, 1, z4))
+
+/*
+** write_za32_f32_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova za2h\.s\[\1, 0:3\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z28_2_w11, svfloat32x4_t,
+ svwrite_hor_za32_f32_vg4 (2, w11, z28),
+ svwrite_hor_za32_f32_vg4 (2, w11, z28))
+
+/*
+** write_za32_s32_z0_3_w12:
+** mova za3h\.s\[w12, 0:3\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z0_3_w12, svint32x4_t,
+ svwrite_hor_za32_s32_vg4 (3, w12, z0),
+ svwrite_hor_za32_s32_vg4 (3, w12, z0))
+
+/*
+** write_za32_u32_z18_0_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0h\.s\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z18_0_w15, svuint32x4_t,
+ svwrite_hor_za32_u32_vg4 (0, w15, z18),
+ svwrite_hor_za32_u32_vg4 (0, w15, z18))
+
+/*
+** write_za32_f32_z23_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za1h\.s\[\1, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z23_1_w12p4, svfloat32x4_t,
+ svwrite_hor_za32_f32_vg4 (1, w12 + 4, z23),
+ svwrite_hor_za32_f32_vg4 (1, w12 + 4, z23))
+
+/*
+** write_za32_u32_z4_2_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za2h\.s\[\1, 0:3\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z4_2_w12p1, svuint32x4_t,
+ svwrite_hor_za32_u32_vg4 (2, w12 + 1, z4),
+ svwrite_hor_za32_u32_vg4 (2, w12 + 1, z4))
+
+/*
+** write_za32_s32_z28_3_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova za3h\.s\[\1, 0:3\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z28_3_w12p2, svint32x4_t,
+ svwrite_hor_za32_s32_vg4 (3, w12 + 2, z28),
+ svwrite_hor_za32_s32_vg4 (3, w12 + 2, z28))
+
+/*
+** write_za32_f32_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0h\.s\[\1, 0:3\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z0_0_w15p3, svfloat32x4_t,
+ svwrite_hor_za32_f32_vg4 (0, w15 + 3, z0),
+ svwrite_hor_za32_f32_vg4 (0, w15 + 3, z0))
+
+/*
+** write_za32_u32_z28_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova za1h\.s\[\1, 0:3\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z28_1_w12p4, svuint32x4_t,
+ svwrite_hor_za32_u32_vg4 (1, w12 + 4, z28),
+ svwrite_hor_za32_u32_vg4 (1, w12 + 4, z28))
+
+/*
+** write_za32_f32_z4_2_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za2h\.s\[\1, 0:3\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z4_2_w12m1, svfloat32x4_t,
+ svwrite_hor_za32_f32_vg4 (2, w12 - 1, z4),
+ svwrite_hor_za32_f32_vg4 (2, w12 - 1, z4))
+
+/*
+** write_za32_u32_z28_3_w16:
+** mov (w1[2-5]), w16
+** mova za3h\.s\[\1, 0:3\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z28_3_w16, svuint32x4_t,
+ svwrite_hor_za32_u32_vg4 (3, w16, z28),
+ svwrite_hor_za32_u32_vg4 (3, w16, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za64_s64_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0h\.d\[\1, 0:1\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z0_0_0, svint64x2_t,
+ svwrite_hor_za64_s64_vg2 (0, 0, z0),
+ svwrite_hor_za64_s64_vg2 (0, 0, z0))
+
+/*
+** write_za64_u64_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1h\.d\[\1, 0:1\], {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z4_1_1, svuint64x2_t,
+ svwrite_hor_za64_u64_vg2 (1, 1, z4),
+ svwrite_hor_za64_u64_vg2 (1, 1, z4))
+
+/*
+** write_za64_f64_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova za2h\.d\[\1, 0:1\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z28_2_w11, svfloat64x2_t,
+ svwrite_hor_za64_f64_vg2 (2, w11, z28),
+ svwrite_hor_za64_f64_vg2 (2, w11, z28))
+
+/*
+** write_za64_f64_z0_3_w12:
+** mova za3h\.d\[w12, 0:1\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z0_3_w12, svfloat64x2_t,
+ svwrite_hor_za64_f64_vg2 (3, w12, z0),
+ svwrite_hor_za64_f64_vg2 (3, w12, z0))
+
+/*
+** write_za64_u64_z18_4_w15:
+** mova za4h\.d\[w15, 0:1\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z18_4_w15, svuint64x2_t,
+ svwrite_hor_za64_u64_vg2 (4, w15, z18),
+ svwrite_hor_za64_u64_vg2 (4, w15, z18))
+
+/*
+** write_za64_s64_z23_5_w12p2:
+** add (w[0-9]+), w12, #?2
+** mov [^\n]+
+** mov [^\n]+
+** mova za5h\.d\[\1, 0:1\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z23_5_w12p2, svint64x2_t,
+ svwrite_hor_za64_s64_vg2 (5, w12 + 2, z23),
+ svwrite_hor_za64_s64_vg2 (5, w12 + 2, z23))
+
+/*
+** write_za64_f64_z4_6_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za6h\.d\[\1, 0:1\], {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z4_6_w12p1, svfloat64x2_t,
+ svwrite_hor_za64_f64_vg2 (6, w12 + 1, z4),
+ svwrite_hor_za64_f64_vg2 (6, w12 + 1, z4))
+
+/*
+** write_za64_u64_z0_7_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za7h\.d\[\1, 0:1\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z0_7_w15p3, svuint64x2_t,
+ svwrite_hor_za64_u64_vg2 (7, w15 + 3, z0),
+ svwrite_hor_za64_u64_vg2 (7, w15 + 3, z0))
+
+/*
+** write_za64_s64_z0_1_w15p4:
+** add (w[0-9]+), w15, #?4
+** mova za1h\.d\[\1, 0:1\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z0_1_w15p4, svint64x2_t,
+ svwrite_hor_za64_s64_vg2 (1, w15 + 4, z0),
+ svwrite_hor_za64_s64_vg2 (1, w15 + 4, z0))
+
+/*
+** write_za64_u64_z4_3_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za3h\.d\[\1, 0:1\], {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z4_3_w12m1, svuint64x2_t,
+ svwrite_hor_za64_u64_vg2 (3, w12 - 1, z4),
+ svwrite_hor_za64_u64_vg2 (3, w12 - 1, z4))
+
+/*
+** write_za64_u64_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova za1h\.d\[\1, 0:1\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z18_1_w16, svuint64x2_t,
+ svwrite_hor_za64_u64_vg2 (1, w16, z18),
+ svwrite_hor_za64_u64_vg2 (1, w16, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za64_s64_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0h\.d\[\1, 0:3\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z0_0_0, svint64x4_t,
+ svwrite_hor_za64_s64_vg4 (0, 0, z0),
+ svwrite_hor_za64_s64_vg4 (0, 0, z0))
+
+/*
+** write_za64_u64_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1h\.d\[\1, 0:3\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z4_1_1, svuint64x4_t,
+ svwrite_hor_za64_u64_vg4 (1, 1, z4),
+ svwrite_hor_za64_u64_vg4 (1, 1, z4))
+
+/*
+** write_za64_f64_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova za2h\.d\[\1, 0:3\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z28_2_w11, svfloat64x4_t,
+ svwrite_hor_za64_f64_vg4 (2, w11, z28),
+ svwrite_hor_za64_f64_vg4 (2, w11, z28))
+
+/*
+** write_za64_s64_z0_3_w12:
+** mova za3h\.d\[w12, 0:3\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z0_3_w12, svint64x4_t,
+ svwrite_hor_za64_s64_vg4 (3, w12, z0),
+ svwrite_hor_za64_s64_vg4 (3, w12, z0))
+
+/*
+** write_za64_u64_z18_4_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za4h\.d\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z18_4_w15, svuint64x4_t,
+ svwrite_hor_za64_u64_vg4 (4, w15, z18),
+ svwrite_hor_za64_u64_vg4 (4, w15, z18))
+
+/*
+** write_za64_f64_z23_5_w12p4:
+** add (w[0-9]+), w12, #?4
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za5h\.d\[\1, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z23_5_w12p4, svfloat64x4_t,
+ svwrite_hor_za64_f64_vg4 (5, w12 + 4, z23),
+ svwrite_hor_za64_f64_vg4 (5, w12 + 4, z23))
+
+/*
+** write_za64_u64_z4_6_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za6h\.d\[\1, 0:3\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z4_6_w12p1, svuint64x4_t,
+ svwrite_hor_za64_u64_vg4 (6, w12 + 1, z4),
+ svwrite_hor_za64_u64_vg4 (6, w12 + 1, z4))
+
+/*
+** write_za64_s64_z28_7_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova za7h\.d\[\1, 0:3\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z28_7_w12p2, svint64x4_t,
+ svwrite_hor_za64_s64_vg4 (7, w12 + 2, z28),
+ svwrite_hor_za64_s64_vg4 (7, w12 + 2, z28))
+
+/*
+** write_za64_f64_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0h\.d\[\1, 0:3\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z0_0_w15p3, svfloat64x4_t,
+ svwrite_hor_za64_f64_vg4 (0, w15 + 3, z0),
+ svwrite_hor_za64_f64_vg4 (0, w15 + 3, z0))
+
+/*
+** write_za64_u64_z28_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova za1h\.d\[\1, 0:3\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z28_1_w12p4, svuint64x4_t,
+ svwrite_hor_za64_u64_vg4 (1, w12 + 4, z28),
+ svwrite_hor_za64_u64_vg4 (1, w12 + 4, z28))
+
+/*
+** write_za64_f64_z4_2_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za2h\.d\[\1, 0:3\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z4_2_w12m1, svfloat64x4_t,
+ svwrite_hor_za64_f64_vg4 (2, w12 - 1, z4),
+ svwrite_hor_za64_f64_vg4 (2, w12 - 1, z4))
+
+/*
+** write_za64_u64_z28_3_w16:
+** mov (w1[2-5]), w16
+** mova za3h\.d\[\1, 0:3\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z28_3_w16, svuint64x4_t,
+ svwrite_hor_za64_u64_vg4 (3, w16, z28),
+ svwrite_hor_za64_u64_vg4 (3, w16, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za8_s8_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0h\.b\[\1, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_0, svint8x2_t,
+ svwrite_hor_za8_s8_vg2 (0, 0, z0),
+ svwrite_hor_za8_s8_vg2 (0, 0, z0))
+
+/*
+** write_za8_u8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x2_t,
+ svwrite_hor_za8_u8_vg2 (0, 1, z4),
+ svwrite_hor_za8_u8_vg2 (0, 1, z4))
+
+/*
+** write_za8_s8_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova za0h\.b\[\1, 0:1\], {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z28_0_w11, svint8x2_t,
+ svwrite_hor_za8_s8_vg2 (0, w11, z28),
+ svwrite_hor_za8_s8_vg2 (0, w11, z28))
+
+/*
+** write_za8_s8_z0_0_w12:
+** mova za0h\.b\[w12, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_w12, svint8x2_t,
+ svwrite_hor_za8_s8_vg2 (0, w12, z0),
+ svwrite_hor_za8_s8_vg2 (0, w12, z0))
+
+/*
+** write_za8_u8_z18_0_w15:
+** mova za0h\.b\[w15, 0:1\], {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x2_t,
+ svwrite_hor_za8_u8_vg2 (0, w15, z18),
+ svwrite_hor_za8_u8_vg2 (0, w15, z18))
+
+/*
+** write_za8_s8_z23_0_w12p14:
+** mov [^\n]+
+** mov [^\n]+
+** mova za0h\.b\[w12, 14:15\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z23_0_w12p14, svint8x2_t,
+ svwrite_hor_za8_s8_vg2 (0, w12 + 14, z23),
+ svwrite_hor_za8_s8_vg2 (0, w12 + 14, z23))
+
+/*
+** write_za8_u8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x2_t,
+ svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4),
+ svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4))
+
+/*
+** write_za8_s8_z28_0_w12p2:
+** mova za0h\.b\[w12, 2:3\], {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z28_0_w12p2, svint8x2_t,
+ svwrite_hor_za8_s8_vg2 (0, w12 + 2, z28),
+ svwrite_hor_za8_s8_vg2 (0, w12 + 2, z28))
+
+/*
+** write_za8_u8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0h\.b\[\1, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x2_t,
+ svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0),
+ svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0))
+
+/*
+** write_za8_u8_z4_0_w15p12:
+** mova za0h\.b\[w15, 12:13\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x2_t,
+ svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4),
+ svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4))
+
+/*
+** write_za8_u8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** mova za0h\.b\[\1, 0:1\], {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z28_0_w12p15, svuint8x2_t,
+ svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28),
+ svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28))
+
+/*
+** write_za8_s8_z0_0_w15p16:
+** add (w[0-9]+), w15, #?16
+** mova za0h\.b\[\1, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_w15p16, svint8x2_t,
+ svwrite_hor_za8_s8_vg2 (0, w15 + 16, z0),
+ svwrite_hor_za8_s8_vg2 (0, w15 + 16, z0))
+
+/*
+** write_za8_u8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t,
+ svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4),
+ svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4))
+
+/*
+** write_za8_u8_z18_0_w16:
+** mov (w1[2-5]), w16
+** mova za0h\.b\[\1, 0:1\], {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t,
+ svwrite_hor_za8_u8_vg2 (0, w16, z18),
+ svwrite_hor_za8_u8_vg2 (0, w16, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za8_s8_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0h\.b\[\1, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_0, svint8x4_t,
+ svwrite_hor_za8_s8_vg4 (0, 0, z0),
+ svwrite_hor_za8_s8_vg4 (0, 0, z0))
+
+/*
+** write_za8_u8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x4_t,
+ svwrite_hor_za8_u8_vg4 (0, 1, z4),
+ svwrite_hor_za8_u8_vg4 (0, 1, z4))
+
+/*
+** write_za8_s8_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z28_0_w11, svint8x4_t,
+ svwrite_hor_za8_s8_vg4 (0, w11, z28),
+ svwrite_hor_za8_s8_vg4 (0, w11, z28))
+
+/*
+** write_za8_s8_z0_0_w12:
+** mova za0h\.b\[w12, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_w12, svint8x4_t,
+ svwrite_hor_za8_s8_vg4 (0, w12, z0),
+ svwrite_hor_za8_s8_vg4 (0, w12, z0))
+
+/*
+** write_za8_u8_z18_0_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0h\.b\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x4_t,
+ svwrite_hor_za8_u8_vg4 (0, w15, z18),
+ svwrite_hor_za8_u8_vg4 (0, w15, z18))
+
+/*
+** write_za8_s8_z23_0_w12p12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0h\.b\[w12, 12:15\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z23_0_w12p12, svint8x4_t,
+ svwrite_hor_za8_s8_vg4 (0, w12 + 12, z23),
+ svwrite_hor_za8_s8_vg4 (0, w12 + 12, z23))
+
+/*
+** write_za8_u8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x4_t,
+ svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4),
+ svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4))
+
+/*
+** write_za8_s8_z28_0_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z28_0_w12p2, svint8x4_t,
+ svwrite_hor_za8_s8_vg4 (0, w12 + 2, z28),
+ svwrite_hor_za8_s8_vg4 (0, w12 + 2, z28))
+
+/*
+** write_za8_u8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0h\.b\[\1, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x4_t,
+ svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0),
+ svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0))
+
+/*
+** write_za8_u8_z0_0_w12p4:
+** mova za0h\.b\[w12, 4:7\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z0_0_w12p4, svuint8x4_t,
+ svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0),
+ svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0))
+
+/*
+** write_za8_u8_z4_0_w15p12:
+** mova za0h\.b\[w15, 12:15\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x4_t,
+ svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4),
+ svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4))
+
+/*
+** write_za8_u8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z28_0_w12p14, svuint8x4_t,
+ svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28),
+ svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28))
+
+/*
+** write_za8_s8_z0_0_w15p16:
+** add (w[0-9]+), w15, #?16
+** mova za0h\.b\[\1, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_w15p16, svint8x4_t,
+ svwrite_hor_za8_s8_vg4 (0, w15 + 16, z0),
+ svwrite_hor_za8_s8_vg4 (0, w15 + 16, z0))
+
+/*
+** write_za8_u8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t,
+ svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4),
+ svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4))
+
+/*
+** write_za8_u8_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t,
+ svwrite_hor_za8_u8_vg4 (0, w16, z28),
+ svwrite_hor_za8_u8_vg4 (0, w16, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za16_s16_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0v\.h\[\1, 0:1\], {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_0_0, svint16x2_t,
+ svwrite_ver_za16_s16_vg2 (0, 0, z0),
+ svwrite_ver_za16_s16_vg2 (0, 0, z0))
+
+/*
+** write_za16_u16_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1v\.h\[\1, 0:1\], {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z4_1_1, svuint16x2_t,
+ svwrite_ver_za16_u16_vg2 (1, 1, z4),
+ svwrite_ver_za16_u16_vg2 (1, 1, z4))
+
+/*
+** write_za16_f16_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova za0v\.h\[\1, 0:1\], {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_f16_z28_0_w11, svfloat16x2_t,
+ svwrite_ver_za16_f16_vg2 (0, w11, z28),
+ svwrite_ver_za16_f16_vg2 (0, w11, z28))
+
+/*
+** write_za16_bf16_z0_1_w12:
+** mova za1v\.h\[w12, 0:1\], {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_bf16_z0_1_w12, svbfloat16x2_t,
+ svwrite_ver_za16_bf16_vg2 (1, w12, z0),
+ svwrite_ver_za16_bf16_vg2 (1, w12, z0))
+
+/*
+** write_za16_u16_z18_0_w15:
+** mova za0v\.h\[w15, 0:1\], {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z18_0_w15, svuint16x2_t,
+ svwrite_ver_za16_u16_vg2 (0, w15, z18),
+ svwrite_ver_za16_u16_vg2 (0, w15, z18))
+
+/*
+** write_za16_s16_z23_1_w12p6:
+** mov [^\n]+
+** mov [^\n]+
+** mova za1v\.h\[w12, 6:7\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z23_1_w12p6, svint16x2_t,
+ svwrite_ver_za16_s16_vg2 (1, w12 + 6, z23),
+ svwrite_ver_za16_s16_vg2 (1, w12 + 6, z23))
+
+/*
+** write_za16_f16_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0v\.h\[\1, 0:1\], {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_f16_z4_0_w12p1, svfloat16x2_t,
+ svwrite_ver_za16_f16_vg2 (0, w12 + 1, z4),
+ svwrite_ver_za16_f16_vg2 (0, w12 + 1, z4))
+
+/*
+** write_za16_s16_z28_1_w12p2:
+** mova za1v\.h\[w12, 2:3\], {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z28_1_w12p2, svint16x2_t,
+ svwrite_ver_za16_s16_vg2 (1, w12 + 2, z28),
+ svwrite_ver_za16_s16_vg2 (1, w12 + 2, z28))
+
+/*
+** write_za16_u16_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0v\.h\[\1, 0:1\], {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z0_0_w15p3, svuint16x2_t,
+ svwrite_ver_za16_u16_vg2 (0, w15 + 3, z0),
+ svwrite_ver_za16_u16_vg2 (0, w15 + 3, z0))
+
+/*
+** write_za16_bf16_z4_1_w15p4:
+** mova za1v\.h\[w15, 4:5\], {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_bf16_z4_1_w15p4, svbfloat16x2_t,
+ svwrite_ver_za16_bf16_vg2 (1, w15 + 4, z4),
+ svwrite_ver_za16_bf16_vg2 (1, w15 + 4, z4))
+
+/*
+** write_za16_u16_z28_0_w12p7:
+** add (w[0-9]+), w12, #?7
+** mova za0v\.h\[\1, 0:1\], {z28\.h - z29\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z28_0_w12p7, svuint16x2_t,
+ svwrite_ver_za16_u16_vg2 (0, w12 + 7, z28),
+ svwrite_ver_za16_u16_vg2 (0, w12 + 7, z28))
+
+/*
+** write_za16_s16_z0_1_w15p8:
+** add (w[0-9]+), w15, #?8
+** mova za1v\.h\[\1, 0:1\], {z0\.h - z1\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_1_w15p8, svint16x2_t,
+ svwrite_ver_za16_s16_vg2 (1, w15 + 8, z0),
+ svwrite_ver_za16_s16_vg2 (1, w15 + 8, z0))
+
+/*
+** write_za16_u16_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0v\.h\[\1, 0:1\], {z4\.h - z5\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z4_0_w12m1, svuint16x2_t,
+ svwrite_ver_za16_u16_vg2 (0, w12 - 1, z4),
+ svwrite_ver_za16_u16_vg2 (0, w12 - 1, z4))
+
+/*
+** write_za16_u16_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova za1v\.h\[\1, 0:1\], {z18\.h - z19\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z18_1_w16, svuint16x2_t,
+ svwrite_ver_za16_u16_vg2 (1, w16, z18),
+ svwrite_ver_za16_u16_vg2 (1, w16, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za16_s16_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0v\.h\[\1, 0:3\], {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_0_0, svint16x4_t,
+ svwrite_ver_za16_s16_vg4 (0, 0, z0),
+ svwrite_ver_za16_s16_vg4 (0, 0, z0))
+
+/*
+** write_za16_u16_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1v\.h\[\1, 0:3\], {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z4_1_1, svuint16x4_t,
+ svwrite_ver_za16_u16_vg4 (1, 1, z4),
+ svwrite_ver_za16_u16_vg4 (1, 1, z4))
+
+/*
+** write_za16_f16_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova za0v\.h\[\1, 0:3\], {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_f16_z28_0_w11, svfloat16x4_t,
+ svwrite_ver_za16_f16_vg4 (0, w11, z28),
+ svwrite_ver_za16_f16_vg4 (0, w11, z28))
+
+/*
+** write_za16_s16_z0_1_w12:
+** mova za1v\.h\[w12, 0:3\], {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_1_w12, svint16x4_t,
+ svwrite_ver_za16_s16_vg4 (1, w12, z0),
+ svwrite_ver_za16_s16_vg4 (1, w12, z0))
+
+/*
+** write_za16_u16_z18_0_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0v\.h\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z18_0_w15, svuint16x4_t,
+ svwrite_ver_za16_u16_vg4 (0, w15, z18),
+ svwrite_ver_za16_u16_vg4 (0, w15, z18))
+
+/*
+** write_za16_bf16_z23_1_w12p4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za1v\.h\[w12, 4:7\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za16_bf16_z23_1_w12p4, svbfloat16x4_t,
+ svwrite_ver_za16_bf16_vg4 (1, w12 + 4, z23),
+ svwrite_ver_za16_bf16_vg4 (1, w12 + 4, z23))
+
+/*
+** write_za16_u16_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0v\.h\[\1, 0:3\], {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z4_0_w12p1, svuint16x4_t,
+ svwrite_ver_za16_u16_vg4 (0, w12 + 1, z4),
+ svwrite_ver_za16_u16_vg4 (0, w12 + 1, z4))
+
+/*
+** write_za16_s16_z28_1_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova za1v\.h\[\1, 0:3\], {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z28_1_w12p2, svint16x4_t,
+ svwrite_ver_za16_s16_vg4 (1, w12 + 2, z28),
+ svwrite_ver_za16_s16_vg4 (1, w12 + 2, z28))
+
+/*
+** write_za16_f16_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0v\.h\[\1, 0:3\], {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_f16_z0_0_w15p3, svfloat16x4_t,
+ svwrite_ver_za16_f16_vg4 (0, w15 + 3, z0),
+ svwrite_ver_za16_f16_vg4 (0, w15 + 3, z0))
+
+/*
+** write_za16_u16_z28_1_w12p6:
+** add (w[0-9]+), w12, #?6
+** mova za1v\.h\[\1, 0:3\], {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z28_1_w12p6, svuint16x4_t,
+ svwrite_ver_za16_u16_vg4 (1, w12 + 6, z28),
+ svwrite_ver_za16_u16_vg4 (1, w12 + 6, z28))
+
+/*
+** write_za16_s16_z0_0_w15p8:
+** add (w[0-9]+), w15, #?8
+** mova za0v\.h\[\1, 0:3\], {z0\.h - z3\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_s16_z0_0_w15p8, svint16x4_t,
+ svwrite_ver_za16_s16_vg4 (0, w15 + 8, z0),
+ svwrite_ver_za16_s16_vg4 (0, w15 + 8, z0))
+
+/*
+** write_za16_bf16_z4_1_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za1v\.h\[\1, 0:3\], {z4\.h - z7\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_bf16_z4_1_w12m1, svbfloat16x4_t,
+ svwrite_ver_za16_bf16_vg4 (1, w12 - 1, z4),
+ svwrite_ver_za16_bf16_vg4 (1, w12 - 1, z4))
+
+/*
+** write_za16_u16_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova za0v\.h\[\1, 0:3\], {z28\.h - z31\.h}
+** ret
+*/
+TEST_ZA_XN (write_za16_u16_z28_0_w16, svuint16x4_t,
+ svwrite_ver_za16_u16_vg4 (0, w16, z28),
+ svwrite_ver_za16_u16_vg4 (0, w16, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za32_s32_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0v\.s\[\1, 0:1\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z0_0_0, svint32x2_t,
+ svwrite_ver_za32_s32_vg2 (0, 0, z0),
+ svwrite_ver_za32_s32_vg2 (0, 0, z0))
+
+/*
+** write_za32_u32_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1v\.s\[\1, 0:1\], {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z4_1_1, svuint32x2_t,
+ svwrite_ver_za32_u32_vg2 (1, 1, z4),
+ svwrite_ver_za32_u32_vg2 (1, 1, z4))
+
+/*
+** write_za32_f32_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova za2v\.s\[\1, 0:1\], {z28\.s - z29\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z28_2_w11, svfloat32x2_t,
+ svwrite_ver_za32_f32_vg2 (2, w11, z28),
+ svwrite_ver_za32_f32_vg2 (2, w11, z28))
+
+/*
+** write_za32_f32_z0_3_w12:
+** mova za3v\.s\[w12, 0:1\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z0_3_w12, svfloat32x2_t,
+ svwrite_ver_za32_f32_vg2 (3, w12, z0),
+ svwrite_ver_za32_f32_vg2 (3, w12, z0))
+
+/*
+** write_za32_u32_z18_0_w15:
+** mova za0v\.s\[w15, 0:1\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z18_0_w15, svuint32x2_t,
+ svwrite_ver_za32_u32_vg2 (0, w15, z18),
+ svwrite_ver_za32_u32_vg2 (0, w15, z18))
+
+/*
+** write_za32_s32_z23_1_w12p2:
+** mov [^\n]+
+** mov [^\n]+
+** mova za1v\.s\[w12, 2:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z23_1_w12p2, svint32x2_t,
+ svwrite_ver_za32_s32_vg2 (1, w12 + 2, z23),
+ svwrite_ver_za32_s32_vg2 (1, w12 + 2, z23))
+
+/*
+** write_za32_f32_z4_2_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za2v\.s\[\1, 0:1\], {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z4_2_w12p1, svfloat32x2_t,
+ svwrite_ver_za32_f32_vg2 (2, w12 + 1, z4),
+ svwrite_ver_za32_f32_vg2 (2, w12 + 1, z4))
+
+/*
+** write_za32_u32_z0_3_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za3v\.s\[\1, 0:1\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z0_3_w15p3, svuint32x2_t,
+ svwrite_ver_za32_u32_vg2 (3, w15 + 3, z0),
+ svwrite_ver_za32_u32_vg2 (3, w15 + 3, z0))
+
+/*
+** write_za32_s32_z0_1_w15p4:
+** add (w[0-9]+), w15, #?4
+** mova za1v\.s\[\1, 0:1\], {z0\.s - z1\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z0_1_w15p4, svint32x2_t,
+ svwrite_ver_za32_s32_vg2 (1, w15 + 4, z0),
+ svwrite_ver_za32_s32_vg2 (1, w15 + 4, z0))
+
+/*
+** write_za32_u32_z4_3_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za3v\.s\[\1, 0:1\], {z4\.s - z5\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z4_3_w12m1, svuint32x2_t,
+ svwrite_ver_za32_u32_vg2 (3, w12 - 1, z4),
+ svwrite_ver_za32_u32_vg2 (3, w12 - 1, z4))
+
+/*
+** write_za32_u32_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova za1v\.s\[\1, 0:1\], {z18\.s - z19\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z18_1_w16, svuint32x2_t,
+ svwrite_ver_za32_u32_vg2 (1, w16, z18),
+ svwrite_ver_za32_u32_vg2 (1, w16, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za32_s32_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0v\.s\[\1, 0:3\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z0_0_0, svint32x4_t,
+ svwrite_ver_za32_s32_vg4 (0, 0, z0),
+ svwrite_ver_za32_s32_vg4 (0, 0, z0))
+
+/*
+** write_za32_u32_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1v\.s\[\1, 0:3\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z4_1_1, svuint32x4_t,
+ svwrite_ver_za32_u32_vg4 (1, 1, z4),
+ svwrite_ver_za32_u32_vg4 (1, 1, z4))
+
+/*
+** write_za32_f32_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova za2v\.s\[\1, 0:3\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z28_2_w11, svfloat32x4_t,
+ svwrite_ver_za32_f32_vg4 (2, w11, z28),
+ svwrite_ver_za32_f32_vg4 (2, w11, z28))
+
+/*
+** write_za32_s32_z0_3_w12:
+** mova za3v\.s\[w12, 0:3\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z0_3_w12, svint32x4_t,
+ svwrite_ver_za32_s32_vg4 (3, w12, z0),
+ svwrite_ver_za32_s32_vg4 (3, w12, z0))
+
+/*
+** write_za32_u32_z18_0_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0v\.s\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z18_0_w15, svuint32x4_t,
+ svwrite_ver_za32_u32_vg4 (0, w15, z18),
+ svwrite_ver_za32_u32_vg4 (0, w15, z18))
+
+/*
+** write_za32_f32_z23_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za1v\.s\[\1, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z23_1_w12p4, svfloat32x4_t,
+ svwrite_ver_za32_f32_vg4 (1, w12 + 4, z23),
+ svwrite_ver_za32_f32_vg4 (1, w12 + 4, z23))
+
+/*
+** write_za32_u32_z4_2_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za2v\.s\[\1, 0:3\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z4_2_w12p1, svuint32x4_t,
+ svwrite_ver_za32_u32_vg4 (2, w12 + 1, z4),
+ svwrite_ver_za32_u32_vg4 (2, w12 + 1, z4))
+
+/*
+** write_za32_s32_z28_3_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova za3v\.s\[\1, 0:3\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_s32_z28_3_w12p2, svint32x4_t,
+ svwrite_ver_za32_s32_vg4 (3, w12 + 2, z28),
+ svwrite_ver_za32_s32_vg4 (3, w12 + 2, z28))
+
+/*
+** write_za32_f32_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0v\.s\[\1, 0:3\], {z0\.s - z3\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z0_0_w15p3, svfloat32x4_t,
+ svwrite_ver_za32_f32_vg4 (0, w15 + 3, z0),
+ svwrite_ver_za32_f32_vg4 (0, w15 + 3, z0))
+
+/*
+** write_za32_u32_z28_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova za1v\.s\[\1, 0:3\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z28_1_w12p4, svuint32x4_t,
+ svwrite_ver_za32_u32_vg4 (1, w12 + 4, z28),
+ svwrite_ver_za32_u32_vg4 (1, w12 + 4, z28))
+
+/*
+** write_za32_f32_z4_2_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za2v\.s\[\1, 0:3\], {z4\.s - z7\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_f32_z4_2_w12m1, svfloat32x4_t,
+ svwrite_ver_za32_f32_vg4 (2, w12 - 1, z4),
+ svwrite_ver_za32_f32_vg4 (2, w12 - 1, z4))
+
+/*
+** write_za32_u32_z28_3_w16:
+** mov (w1[2-5]), w16
+** mova za3v\.s\[\1, 0:3\], {z28\.s - z31\.s}
+** ret
+*/
+TEST_ZA_XN (write_za32_u32_z28_3_w16, svuint32x4_t,
+ svwrite_ver_za32_u32_vg4 (3, w16, z28),
+ svwrite_ver_za32_u32_vg4 (3, w16, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za64_s64_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0v\.d\[\1, 0:1\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z0_0_0, svint64x2_t,
+ svwrite_ver_za64_s64_vg2 (0, 0, z0),
+ svwrite_ver_za64_s64_vg2 (0, 0, z0))
+
+/*
+** write_za64_u64_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1v\.d\[\1, 0:1\], {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z4_1_1, svuint64x2_t,
+ svwrite_ver_za64_u64_vg2 (1, 1, z4),
+ svwrite_ver_za64_u64_vg2 (1, 1, z4))
+
+/*
+** write_za64_f64_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova za2v\.d\[\1, 0:1\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z28_2_w11, svfloat64x2_t,
+ svwrite_ver_za64_f64_vg2 (2, w11, z28),
+ svwrite_ver_za64_f64_vg2 (2, w11, z28))
+
+/*
+** write_za64_f64_z0_3_w12:
+** mova za3v\.d\[w12, 0:1\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z0_3_w12, svfloat64x2_t,
+ svwrite_ver_za64_f64_vg2 (3, w12, z0),
+ svwrite_ver_za64_f64_vg2 (3, w12, z0))
+
+/*
+** write_za64_u64_z18_4_w15:
+** mova za4v\.d\[w15, 0:1\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z18_4_w15, svuint64x2_t,
+ svwrite_ver_za64_u64_vg2 (4, w15, z18),
+ svwrite_ver_za64_u64_vg2 (4, w15, z18))
+
+/*
+** write_za64_s64_z23_5_w12p2:
+** add (w[0-9]+), w12, #?2
+** mov [^\n]+
+** mov [^\n]+
+** mova za5v\.d\[\1, 0:1\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z23_5_w12p2, svint64x2_t,
+ svwrite_ver_za64_s64_vg2 (5, w12 + 2, z23),
+ svwrite_ver_za64_s64_vg2 (5, w12 + 2, z23))
+
+/*
+** write_za64_f64_z4_6_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za6v\.d\[\1, 0:1\], {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z4_6_w12p1, svfloat64x2_t,
+ svwrite_ver_za64_f64_vg2 (6, w12 + 1, z4),
+ svwrite_ver_za64_f64_vg2 (6, w12 + 1, z4))
+
+/*
+** write_za64_u64_z0_7_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za7v\.d\[\1, 0:1\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z0_7_w15p3, svuint64x2_t,
+ svwrite_ver_za64_u64_vg2 (7, w15 + 3, z0),
+ svwrite_ver_za64_u64_vg2 (7, w15 + 3, z0))
+
+/*
+** write_za64_s64_z0_1_w15p4:
+** add (w[0-9]+), w15, #?4
+** mova za1v\.d\[\1, 0:1\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z0_1_w15p4, svint64x2_t,
+ svwrite_ver_za64_s64_vg2 (1, w15 + 4, z0),
+ svwrite_ver_za64_s64_vg2 (1, w15 + 4, z0))
+
+/*
+** write_za64_u64_z4_3_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za3v\.d\[\1, 0:1\], {z4\.d - z5\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z4_3_w12m1, svuint64x2_t,
+ svwrite_ver_za64_u64_vg2 (3, w12 - 1, z4),
+ svwrite_ver_za64_u64_vg2 (3, w12 - 1, z4))
+
+/*
+** write_za64_u64_z18_1_w16:
+** mov (w1[2-5]), w16
+** mova za1v\.d\[\1, 0:1\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z18_1_w16, svuint64x2_t,
+ svwrite_ver_za64_u64_vg2 (1, w16, z18),
+ svwrite_ver_za64_u64_vg2 (1, w16, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za64_s64_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0v\.d\[\1, 0:3\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z0_0_0, svint64x4_t,
+ svwrite_ver_za64_s64_vg4 (0, 0, z0),
+ svwrite_ver_za64_s64_vg4 (0, 0, z0))
+
+/*
+** write_za64_u64_z4_1_1:
+** mov (w1[2-5]), #?1
+** mova za1v\.d\[\1, 0:3\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z4_1_1, svuint64x4_t,
+ svwrite_ver_za64_u64_vg4 (1, 1, z4),
+ svwrite_ver_za64_u64_vg4 (1, 1, z4))
+
+/*
+** write_za64_f64_z28_2_w11:
+** mov (w1[2-5]), w11
+** mova za2v\.d\[\1, 0:3\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z28_2_w11, svfloat64x4_t,
+ svwrite_ver_za64_f64_vg4 (2, w11, z28),
+ svwrite_ver_za64_f64_vg4 (2, w11, z28))
+
+/*
+** write_za64_s64_z0_3_w12:
+** mova za3v\.d\[w12, 0:3\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z0_3_w12, svint64x4_t,
+ svwrite_ver_za64_s64_vg4 (3, w12, z0),
+ svwrite_ver_za64_s64_vg4 (3, w12, z0))
+
+/*
+** write_za64_u64_z18_4_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za4v\.d\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z18_4_w15, svuint64x4_t,
+ svwrite_ver_za64_u64_vg4 (4, w15, z18),
+ svwrite_ver_za64_u64_vg4 (4, w15, z18))
+
+/*
+** write_za64_f64_z23_5_w12p4:
+** add (w[0-9]+), w12, #?4
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za5v\.d\[\1, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z23_5_w12p4, svfloat64x4_t,
+ svwrite_ver_za64_f64_vg4 (5, w12 + 4, z23),
+ svwrite_ver_za64_f64_vg4 (5, w12 + 4, z23))
+
+/*
+** write_za64_u64_z4_6_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za6v\.d\[\1, 0:3\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z4_6_w12p1, svuint64x4_t,
+ svwrite_ver_za64_u64_vg4 (6, w12 + 1, z4),
+ svwrite_ver_za64_u64_vg4 (6, w12 + 1, z4))
+
+/*
+** write_za64_s64_z28_7_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova za7v\.d\[\1, 0:3\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_s64_z28_7_w12p2, svint64x4_t,
+ svwrite_ver_za64_s64_vg4 (7, w12 + 2, z28),
+ svwrite_ver_za64_s64_vg4 (7, w12 + 2, z28))
+
+/*
+** write_za64_f64_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0v\.d\[\1, 0:3\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z0_0_w15p3, svfloat64x4_t,
+ svwrite_ver_za64_f64_vg4 (0, w15 + 3, z0),
+ svwrite_ver_za64_f64_vg4 (0, w15 + 3, z0))
+
+/*
+** write_za64_u64_z28_1_w12p4:
+** add (w[0-9]+), w12, #?4
+** mova za1v\.d\[\1, 0:3\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z28_1_w12p4, svuint64x4_t,
+ svwrite_ver_za64_u64_vg4 (1, w12 + 4, z28),
+ svwrite_ver_za64_u64_vg4 (1, w12 + 4, z28))
+
+/*
+** write_za64_f64_z4_2_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za2v\.d\[\1, 0:3\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_f64_z4_2_w12m1, svfloat64x4_t,
+ svwrite_ver_za64_f64_vg4 (2, w12 - 1, z4),
+ svwrite_ver_za64_f64_vg4 (2, w12 - 1, z4))
+
+/*
+** write_za64_u64_z28_3_w16:
+** mov (w1[2-5]), w16
+** mova za3v\.d\[\1, 0:3\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_za64_u64_z28_3_w16, svuint64x4_t,
+ svwrite_ver_za64_u64_vg4 (3, w16, z28),
+ svwrite_ver_za64_u64_vg4 (3, w16, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za8_s8_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0v\.b\[\1, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_0, svint8x2_t,
+ svwrite_ver_za8_s8_vg2 (0, 0, z0),
+ svwrite_ver_za8_s8_vg2 (0, 0, z0))
+
+/*
+** write_za8_u8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x2_t,
+ svwrite_ver_za8_u8_vg2 (0, 1, z4),
+ svwrite_ver_za8_u8_vg2 (0, 1, z4))
+
+/*
+** write_za8_s8_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova za0v\.b\[\1, 0:1\], {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z28_0_w11, svint8x2_t,
+ svwrite_ver_za8_s8_vg2 (0, w11, z28),
+ svwrite_ver_za8_s8_vg2 (0, w11, z28))
+
+/*
+** write_za8_s8_z0_0_w12:
+** mova za0v\.b\[w12, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_w12, svint8x2_t,
+ svwrite_ver_za8_s8_vg2 (0, w12, z0),
+ svwrite_ver_za8_s8_vg2 (0, w12, z0))
+
+/*
+** write_za8_u8_z18_0_w15:
+** mova za0v\.b\[w15, 0:1\], {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x2_t,
+ svwrite_ver_za8_u8_vg2 (0, w15, z18),
+ svwrite_ver_za8_u8_vg2 (0, w15, z18))
+
+/*
+** write_za8_s8_z23_0_w12p14:
+** mov [^\n]+
+** mov [^\n]+
+** mova za0v\.b\[w12, 14:15\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z23_0_w12p14, svint8x2_t,
+ svwrite_ver_za8_s8_vg2 (0, w12 + 14, z23),
+ svwrite_ver_za8_s8_vg2 (0, w12 + 14, z23))
+
+/*
+** write_za8_u8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x2_t,
+ svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4),
+ svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4))
+
+/*
+** write_za8_s8_z28_0_w12p2:
+** mova za0v\.b\[w12, 2:3\], {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z28_0_w12p2, svint8x2_t,
+ svwrite_ver_za8_s8_vg2 (0, w12 + 2, z28),
+ svwrite_ver_za8_s8_vg2 (0, w12 + 2, z28))
+
+/*
+** write_za8_u8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0v\.b\[\1, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x2_t,
+ svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0),
+ svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0))
+
+/*
+** write_za8_u8_z4_0_w15p12:
+** mova za0v\.b\[w15, 12:13\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x2_t,
+ svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4),
+ svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4))
+
+/*
+** write_za8_u8_z28_0_w12p15:
+** add (w[0-9]+), w12, #?15
+** mova za0v\.b\[\1, 0:1\], {z28\.b - z29\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z28_0_w12p15, svuint8x2_t,
+ svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28),
+ svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28))
+
+/*
+** write_za8_s8_z0_0_w15p16:
+** add (w[0-9]+), w15, #?16
+** mova za0v\.b\[\1, 0:1\], {z0\.b - z1\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_w15p16, svint8x2_t,
+ svwrite_ver_za8_s8_vg2 (0, w15 + 16, z0),
+ svwrite_ver_za8_s8_vg2 (0, w15 + 16, z0))
+
+/*
+** write_za8_u8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t,
+ svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4),
+ svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4))
+
+/*
+** write_za8_u8_z18_0_w16:
+** mov (w1[2-5]), w16
+** mova za0v\.b\[\1, 0:1\], {z18\.b - z19\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t,
+ svwrite_ver_za8_u8_vg2 (0, w16, z18),
+ svwrite_ver_za8_u8_vg2 (0, w16, z18))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_za8_s8_z0_0_0:
+** mov (w1[2-5]), (?:wzr|#?0)
+** mova za0v\.b\[\1, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_0, svint8x4_t,
+ svwrite_ver_za8_s8_vg4 (0, 0, z0),
+ svwrite_ver_za8_s8_vg4 (0, 0, z0))
+
+/*
+** write_za8_u8_z4_0_1:
+** mov (w1[2-5]), #?1
+** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x4_t,
+ svwrite_ver_za8_u8_vg4 (0, 1, z4),
+ svwrite_ver_za8_u8_vg4 (0, 1, z4))
+
+/*
+** write_za8_s8_z28_0_w11:
+** mov (w1[2-5]), w11
+** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z28_0_w11, svint8x4_t,
+ svwrite_ver_za8_s8_vg4 (0, w11, z28),
+ svwrite_ver_za8_s8_vg4 (0, w11, z28))
+
+/*
+** write_za8_s8_z0_0_w12:
+** mova za0v\.b\[w12, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_w12, svint8x4_t,
+ svwrite_ver_za8_s8_vg4 (0, w12, z0),
+ svwrite_ver_za8_s8_vg4 (0, w12, z0))
+
+/*
+** write_za8_u8_z18_0_w15:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0v\.b\[w15, 0:3\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x4_t,
+ svwrite_ver_za8_u8_vg4 (0, w15, z18),
+ svwrite_ver_za8_u8_vg4 (0, w15, z18))
+
+/*
+** write_za8_s8_z23_0_w12p12:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za0v\.b\[w12, 12:15\], {[^\n]+}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z23_0_w12p12, svint8x4_t,
+ svwrite_ver_za8_s8_vg4 (0, w12 + 12, z23),
+ svwrite_ver_za8_s8_vg4 (0, w12 + 12, z23))
+
+/*
+** write_za8_u8_z4_0_w12p1:
+** add (w[0-9]+), w12, #?1
+** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x4_t,
+ svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4),
+ svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4))
+
+/*
+** write_za8_s8_z28_0_w12p2:
+** add (w[0-9]+), w12, #?2
+** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z28_0_w12p2, svint8x4_t,
+ svwrite_ver_za8_s8_vg4 (0, w12 + 2, z28),
+ svwrite_ver_za8_s8_vg4 (0, w12 + 2, z28))
+
+/*
+** write_za8_u8_z0_0_w15p3:
+** add (w[0-9]+), w15, #?3
+** mova za0v\.b\[\1, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x4_t,
+ svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0),
+ svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0))
+
+/*
+** write_za8_u8_z0_0_w12p4:
+** mova za0v\.b\[w12, 4:7\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z0_0_w12p4, svuint8x4_t,
+ svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0),
+ svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0))
+
+/*
+** write_za8_u8_z4_0_w15p12:
+** mova za0v\.b\[w15, 12:15\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x4_t,
+ svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4),
+ svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4))
+
+/*
+** write_za8_u8_z28_0_w12p14:
+** add (w[0-9]+), w12, #?14
+** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z28_0_w12p14, svuint8x4_t,
+ svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28),
+ svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28))
+
+/*
+** write_za8_s8_z0_0_w15p16:
+** add (w[0-9]+), w15, #?16
+** mova za0v\.b\[\1, 0:3\], {z0\.b - z3\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_s8_z0_0_w15p16, svint8x4_t,
+ svwrite_ver_za8_s8_vg4 (0, w15 + 16, z0),
+ svwrite_ver_za8_s8_vg4 (0, w15 + 16, z0))
+
+/*
+** write_za8_u8_z4_0_w12m1:
+** sub (w[0-9]+), w12, #?1
+** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t,
+ svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4),
+ svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4))
+
+/*
+** write_za8_u8_z28_0_w16:
+** mov (w1[2-5]), w16
+** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b}
+** ret
+*/
+TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t,
+ svwrite_ver_za8_u8_vg4 (0, w16, z28),
+ svwrite_ver_za8_u8_vg4 (0, w16, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_0_z0, svfloat16x2_t,
+ svwrite_za16_f16_vg1x2 (0, z0),
+ svwrite_za16_vg1x2 (0, z0))
+
+/*
+** write_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w0_z0, svint16x2_t,
+ svwrite_za16_s16_vg1x2 (w0, z0),
+ svwrite_za16_vg1x2 (w0, z0))
+
+/*
+** write_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w7_z0, svuint16x2_t,
+ svwrite_za16_u16_vg1x2 (w7, z0),
+ svwrite_za16_vg1x2 (w7, z0))
+
+/*
+** write_w8_z0:
+** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z0, svbfloat16x2_t,
+ svwrite_za16_bf16_vg1x2 (w8, z0),
+ svwrite_za16_vg1x2 (w8, z0))
+
+/*
+** write_w11_z0:
+** mova za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w11_z0, svint16x2_t,
+ svwrite_za16_s16_vg1x2 (w11, z0),
+ svwrite_za16_vg1x2 (w11, z0))
+
+
+/*
+** write_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w12_z0, svuint16x2_t,
+ svwrite_za16_u16_vg1x2 (w12, z0),
+ svwrite_za16_vg1x2 (w12, z0))
+
+/*
+** write_w8p7_z0:
+** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p7_z0, svfloat16x2_t,
+ svwrite_za16_f16_vg1x2 (w8 + 7, z0),
+ svwrite_za16_vg1x2 (w8 + 7, z0))
+
+/*
+** write_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p8_z0, svint16x2_t,
+ svwrite_za16_s16_vg1x2 (w8 + 8, z0),
+ svwrite_za16_vg1x2 (w8 + 8, z0))
+
+/*
+** write_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8m1_z0, svuint16x2_t,
+ svwrite_za16_u16_vg1x2 (w8 - 1, z0),
+ svwrite_za16_vg1x2 (w8 - 1, z0))
+
+/*
+** write_w8_z18:
+** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z18, svfloat16x2_t,
+ svwrite_za16_f16_vg1x2 (w8, z18),
+ svwrite_za16_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** write_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z23, svint16x2_t,
+ svwrite_za16_s16_vg1x2 (w8, z23),
+ svwrite_za16_vg1x2 (w8, z23))
+
+/*
+** write_w8_z28:
+** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z28, svbfloat16x2_t,
+ svwrite_za16_bf16_vg1x2 (w8, z28),
+ svwrite_za16_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_0_z0, svint16x4_t,
+ svwrite_za16_s16_vg1x4 (0, z0),
+ svwrite_za16_vg1x4 (0, z0))
+
+/*
+** write_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w0_z0, svuint16x4_t,
+ svwrite_za16_u16_vg1x4 (w0, z0),
+ svwrite_za16_vg1x4 (w0, z0))
+
+/*
+** write_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w7_z0, svfloat16x4_t,
+ svwrite_za16_f16_vg1x4 (w7, z0),
+ svwrite_za16_vg1x4 (w7, z0))
+
+/*
+** write_w8_z0:
+** mova za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z0, svint16x4_t,
+ svwrite_za16_s16_vg1x4 (w8, z0),
+ svwrite_za16_vg1x4 (w8, z0))
+
+/*
+** write_w11_z0:
+** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w11_z0, svuint16x4_t,
+ svwrite_za16_u16_vg1x4 (w11, z0),
+ svwrite_za16_vg1x4 (w11, z0))
+
+
+/*
+** write_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w12_z0, svbfloat16x4_t,
+ svwrite_za16_bf16_vg1x4 (w12, z0),
+ svwrite_za16_vg1x4 (w12, z0))
+
+/*
+** write_w8p7_z0:
+** mova za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p7_z0, svint16x4_t,
+ svwrite_za16_s16_vg1x4 (w8 + 7, z0),
+ svwrite_za16_vg1x4 (w8 + 7, z0))
+
+/*
+** write_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p8_z0, svuint16x4_t,
+ svwrite_za16_u16_vg1x4 (w8 + 8, z0),
+ svwrite_za16_vg1x4 (w8 + 8, z0))
+
+/*
+** write_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8m1_z0, svfloat16x4_t,
+ svwrite_za16_f16_vg1x4 (w8 - 1, z0),
+ svwrite_za16_vg1x4 (w8 - 1, z0))
+
+/*
+** write_w8_z4:
+** mova za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z4, svint16x4_t,
+ svwrite_za16_s16_vg1x4 (w8, z4),
+ svwrite_za16_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** write_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z18, svuint16x4_t,
+ svwrite_za16_u16_vg1x4 (w8, z18),
+ svwrite_za16_vg1x4 (w8, z18))
+
+/*
+** write_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z23, svbfloat16x4_t,
+ svwrite_za16_bf16_vg1x4 (w8, z23),
+ svwrite_za16_vg1x4 (w8, z23))
+
+/*
+** write_w8_z28:
+** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z28, svint16x4_t,
+ svwrite_za16_s16_vg1x4 (w8, z28),
+ svwrite_za16_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_0_z0, svfloat32x2_t,
+ svwrite_za32_f32_vg1x2 (0, z0),
+ svwrite_za32_vg1x2 (0, z0))
+
+/*
+** write_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w0_z0, svint32x2_t,
+ svwrite_za32_s32_vg1x2 (w0, z0),
+ svwrite_za32_vg1x2 (w0, z0))
+
+/*
+** write_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w7_z0, svuint32x2_t,
+ svwrite_za32_u32_vg1x2 (w7, z0),
+ svwrite_za32_vg1x2 (w7, z0))
+
+/*
+** write_w8_z0:
+** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z0, svfloat32x2_t,
+ svwrite_za32_f32_vg1x2 (w8, z0),
+ svwrite_za32_vg1x2 (w8, z0))
+
+/*
+** write_w11_z0:
+** mova za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w11_z0, svint32x2_t,
+ svwrite_za32_s32_vg1x2 (w11, z0),
+ svwrite_za32_vg1x2 (w11, z0))
+
+
+/*
+** write_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w12_z0, svuint32x2_t,
+ svwrite_za32_u32_vg1x2 (w12, z0),
+ svwrite_za32_vg1x2 (w12, z0))
+
+/*
+** write_w8p7_z0:
+** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p7_z0, svfloat32x2_t,
+ svwrite_za32_f32_vg1x2 (w8 + 7, z0),
+ svwrite_za32_vg1x2 (w8 + 7, z0))
+
+/*
+** write_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p8_z0, svint32x2_t,
+ svwrite_za32_s32_vg1x2 (w8 + 8, z0),
+ svwrite_za32_vg1x2 (w8 + 8, z0))
+
+/*
+** write_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8m1_z0, svuint32x2_t,
+ svwrite_za32_u32_vg1x2 (w8 - 1, z0),
+ svwrite_za32_vg1x2 (w8 - 1, z0))
+
+/*
+** write_w8_z18:
+** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z18, svfloat32x2_t,
+ svwrite_za32_f32_vg1x2 (w8, z18),
+ svwrite_za32_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** write_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z23, svint32x2_t,
+ svwrite_za32_s32_vg1x2 (w8, z23),
+ svwrite_za32_vg1x2 (w8, z23))
+
+/*
+** write_w8_z28:
+** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z28, svuint32x2_t,
+ svwrite_za32_u32_vg1x2 (w8, z28),
+ svwrite_za32_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_0_z0, svint32x4_t,
+ svwrite_za32_s32_vg1x4 (0, z0),
+ svwrite_za32_vg1x4 (0, z0))
+
+/*
+** write_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w0_z0, svuint32x4_t,
+ svwrite_za32_u32_vg1x4 (w0, z0),
+ svwrite_za32_vg1x4 (w0, z0))
+
+/*
+** write_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w7_z0, svfloat32x4_t,
+ svwrite_za32_f32_vg1x4 (w7, z0),
+ svwrite_za32_vg1x4 (w7, z0))
+
+/*
+** write_w8_z0:
+** mova za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z0, svint32x4_t,
+ svwrite_za32_s32_vg1x4 (w8, z0),
+ svwrite_za32_vg1x4 (w8, z0))
+
+/*
+** write_w11_z0:
+** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w11_z0, svuint32x4_t,
+ svwrite_za32_u32_vg1x4 (w11, z0),
+ svwrite_za32_vg1x4 (w11, z0))
+
+
+/*
+** write_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w12_z0, svfloat32x4_t,
+ svwrite_za32_f32_vg1x4 (w12, z0),
+ svwrite_za32_vg1x4 (w12, z0))
+
+/*
+** write_w8p7_z0:
+** mova za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p7_z0, svint32x4_t,
+ svwrite_za32_s32_vg1x4 (w8 + 7, z0),
+ svwrite_za32_vg1x4 (w8 + 7, z0))
+
+/*
+** write_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p8_z0, svuint32x4_t,
+ svwrite_za32_u32_vg1x4 (w8 + 8, z0),
+ svwrite_za32_vg1x4 (w8 + 8, z0))
+
+/*
+** write_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8m1_z0, svfloat32x4_t,
+ svwrite_za32_f32_vg1x4 (w8 - 1, z0),
+ svwrite_za32_vg1x4 (w8 - 1, z0))
+
+/*
+** write_w8_z4:
+** mova za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z4, svint32x4_t,
+ svwrite_za32_s32_vg1x4 (w8, z4),
+ svwrite_za32_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** write_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z18, svuint32x4_t,
+ svwrite_za32_u32_vg1x4 (w8, z18),
+ svwrite_za32_vg1x4 (w8, z18))
+
+/*
+** write_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z23, svfloat32x4_t,
+ svwrite_za32_f32_vg1x4 (w8, z23),
+ svwrite_za32_vg1x4 (w8, z23))
+
+/*
+** write_w8_z28:
+** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z28, svint32x4_t,
+ svwrite_za32_s32_vg1x4 (w8, z28),
+ svwrite_za32_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_0_z0, svfloat64x2_t,
+ svwrite_za64_f64_vg1x2 (0, z0),
+ svwrite_za64_vg1x2 (0, z0))
+
+/*
+** write_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w0_z0, svint64x2_t,
+ svwrite_za64_s64_vg1x2 (w0, z0),
+ svwrite_za64_vg1x2 (w0, z0))
+
+/*
+** write_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w7_z0, svuint64x2_t,
+ svwrite_za64_u64_vg1x2 (w7, z0),
+ svwrite_za64_vg1x2 (w7, z0))
+
+/*
+** write_w8_z0:
+** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z0, svfloat64x2_t,
+ svwrite_za64_f64_vg1x2 (w8, z0),
+ svwrite_za64_vg1x2 (w8, z0))
+
+/*
+** write_w11_z0:
+** mova za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w11_z0, svint64x2_t,
+ svwrite_za64_s64_vg1x2 (w11, z0),
+ svwrite_za64_vg1x2 (w11, z0))
+
+
+/*
+** write_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w12_z0, svuint64x2_t,
+ svwrite_za64_u64_vg1x2 (w12, z0),
+ svwrite_za64_vg1x2 (w12, z0))
+
+/*
+** write_w8p7_z0:
+** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p7_z0, svfloat64x2_t,
+ svwrite_za64_f64_vg1x2 (w8 + 7, z0),
+ svwrite_za64_vg1x2 (w8 + 7, z0))
+
+/*
+** write_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p8_z0, svint64x2_t,
+ svwrite_za64_s64_vg1x2 (w8 + 8, z0),
+ svwrite_za64_vg1x2 (w8 + 8, z0))
+
+/*
+** write_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8m1_z0, svuint64x2_t,
+ svwrite_za64_u64_vg1x2 (w8 - 1, z0),
+ svwrite_za64_vg1x2 (w8 - 1, z0))
+
+/*
+** write_w8_z18:
+** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z18, svfloat64x2_t,
+ svwrite_za64_f64_vg1x2 (w8, z18),
+ svwrite_za64_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** write_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z23, svint64x2_t,
+ svwrite_za64_s64_vg1x2 (w8, z23),
+ svwrite_za64_vg1x2 (w8, z23))
+
+/*
+** write_w8_z28:
+** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z28, svuint64x2_t,
+ svwrite_za64_u64_vg1x2 (w8, z28),
+ svwrite_za64_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_0_z0, svint64x4_t,
+ svwrite_za64_s64_vg1x4 (0, z0),
+ svwrite_za64_vg1x4 (0, z0))
+
+/*
+** write_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w0_z0, svuint64x4_t,
+ svwrite_za64_u64_vg1x4 (w0, z0),
+ svwrite_za64_vg1x4 (w0, z0))
+
+/*
+** write_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w7_z0, svfloat64x4_t,
+ svwrite_za64_f64_vg1x4 (w7, z0),
+ svwrite_za64_vg1x4 (w7, z0))
+
+/*
+** write_w8_z0:
+** mova za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z0, svint64x4_t,
+ svwrite_za64_s64_vg1x4 (w8, z0),
+ svwrite_za64_vg1x4 (w8, z0))
+
+/*
+** write_w11_z0:
+** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w11_z0, svuint64x4_t,
+ svwrite_za64_u64_vg1x4 (w11, z0),
+ svwrite_za64_vg1x4 (w11, z0))
+
+
+/*
+** write_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w12_z0, svfloat64x4_t,
+ svwrite_za64_f64_vg1x4 (w12, z0),
+ svwrite_za64_vg1x4 (w12, z0))
+
+/*
+** write_w8p7_z0:
+** mova za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p7_z0, svint64x4_t,
+ svwrite_za64_s64_vg1x4 (w8 + 7, z0),
+ svwrite_za64_vg1x4 (w8 + 7, z0))
+
+/*
+** write_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p8_z0, svuint64x4_t,
+ svwrite_za64_u64_vg1x4 (w8 + 8, z0),
+ svwrite_za64_vg1x4 (w8 + 8, z0))
+
+/*
+** write_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8m1_z0, svfloat64x4_t,
+ svwrite_za64_f64_vg1x4 (w8 - 1, z0),
+ svwrite_za64_vg1x4 (w8 - 1, z0))
+
+/*
+** write_w8_z4:
+** mova za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z4, svint64x4_t,
+ svwrite_za64_s64_vg1x4 (w8, z4),
+ svwrite_za64_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** write_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z18, svuint64x4_t,
+ svwrite_za64_u64_vg1x4 (w8, z18),
+ svwrite_za64_vg1x4 (w8, z18))
+
+/*
+** write_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z23, svfloat64x4_t,
+ svwrite_za64_f64_vg1x4 (w8, z23),
+ svwrite_za64_vg1x4 (w8, z23))
+
+/*
+** write_w8_z28:
+** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z28, svint64x4_t,
+ svwrite_za64_s64_vg1x4 (w8, z28),
+ svwrite_za64_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_0_z0, svint8x2_t,
+ svwrite_za8_s8_vg1x2 (0, z0),
+ svwrite_za8_vg1x2 (0, z0))
+
+/*
+** write_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w0_z0, svint8x2_t,
+ svwrite_za8_s8_vg1x2 (w0, z0),
+ svwrite_za8_vg1x2 (w0, z0))
+
+/*
+** write_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w7_z0, svuint8x2_t,
+ svwrite_za8_u8_vg1x2 (w7, z0),
+ svwrite_za8_vg1x2 (w7, z0))
+
+/*
+** write_w8_z0:
+** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z0, svint8x2_t,
+ svwrite_za8_s8_vg1x2 (w8, z0),
+ svwrite_za8_vg1x2 (w8, z0))
+
+/*
+** write_w11_z0:
+** mova za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w11_z0, svint8x2_t,
+ svwrite_za8_s8_vg1x2 (w11, z0),
+ svwrite_za8_vg1x2 (w11, z0))
+
+
+/*
+** write_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w12_z0, svuint8x2_t,
+ svwrite_za8_u8_vg1x2 (w12, z0),
+ svwrite_za8_vg1x2 (w12, z0))
+
+/*
+** write_w8p7_z0:
+** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p7_z0, svint8x2_t,
+ svwrite_za8_s8_vg1x2 (w8 + 7, z0),
+ svwrite_za8_vg1x2 (w8 + 7, z0))
+
+/*
+** write_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p8_z0, svint8x2_t,
+ svwrite_za8_s8_vg1x2 (w8 + 8, z0),
+ svwrite_za8_vg1x2 (w8 + 8, z0))
+
+/*
+** write_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8m1_z0, svuint8x2_t,
+ svwrite_za8_u8_vg1x2 (w8 - 1, z0),
+ svwrite_za8_vg1x2 (w8 - 1, z0))
+
+/*
+** write_w8_z18:
+** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z18, svuint8x2_t,
+ svwrite_za8_u8_vg1x2 (w8, z18),
+ svwrite_za8_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** write_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx2\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z23, svint8x2_t,
+ svwrite_za8_s8_vg1x2 (w8, z23),
+ svwrite_za8_vg1x2 (w8, z23))
+
+/*
+** write_w8_z28:
+** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z28, svuint8x2_t,
+ svwrite_za8_u8_vg1x2 (w8, z28),
+ svwrite_za8_vg1x2 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** write_0_z0:
+** mov (w8|w9|w10|w11), #?0
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_0_z0, svint8x4_t,
+ svwrite_za8_s8_vg1x4 (0, z0),
+ svwrite_za8_vg1x4 (0, z0))
+
+/*
+** write_w0_z0:
+** mov (w8|w9|w10|w11), w0
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w0_z0, svuint8x4_t,
+ svwrite_za8_u8_vg1x4 (w0, z0),
+ svwrite_za8_vg1x4 (w0, z0))
+
+/*
+** write_w7_z0:
+** mov (w8|w9|w10|w11), w7
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w7_z0, svint8x4_t,
+ svwrite_za8_s8_vg1x4 (w7, z0),
+ svwrite_za8_vg1x4 (w7, z0))
+
+/*
+** write_w8_z0:
+** mova za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z0, svint8x4_t,
+ svwrite_za8_s8_vg1x4 (w8, z0),
+ svwrite_za8_vg1x4 (w8, z0))
+
+/*
+** write_w11_z0:
+** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w11_z0, svuint8x4_t,
+ svwrite_za8_u8_vg1x4 (w11, z0),
+ svwrite_za8_vg1x4 (w11, z0))
+
+
+/*
+** write_w12_z0:
+** mov (w8|w9|w10|w11), w12
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w12_z0, svint8x4_t,
+ svwrite_za8_s8_vg1x4 (w12, z0),
+ svwrite_za8_vg1x4 (w12, z0))
+
+/*
+** write_w8p7_z0:
+** mova za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p7_z0, svint8x4_t,
+ svwrite_za8_s8_vg1x4 (w8 + 7, z0),
+ svwrite_za8_vg1x4 (w8 + 7, z0))
+
+/*
+** write_w8p8_z0:
+** add (w8|w9|w10|w11), w8, #?8
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8p8_z0, svuint8x4_t,
+ svwrite_za8_u8_vg1x4 (w8 + 8, z0),
+ svwrite_za8_vg1x4 (w8 + 8, z0))
+
+/*
+** write_w8m1_z0:
+** sub (w8|w9|w10|w11), w8, #?1
+** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8m1_z0, svint8x4_t,
+ svwrite_za8_s8_vg1x4 (w8 - 1, z0),
+ svwrite_za8_vg1x4 (w8 - 1, z0))
+
+/*
+** write_w8_z4:
+** mova za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z4, svint8x4_t,
+ svwrite_za8_s8_vg1x4 (w8, z4),
+ svwrite_za8_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers. */
+
+/*
+** write_w8_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z18, svuint8x4_t,
+ svwrite_za8_u8_vg1x4 (w8, z18),
+ svwrite_za8_vg1x4 (w8, z18))
+
+/*
+** write_w8_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mova za\.d\[w8, 0, vgx4\], [^\n]+
+** ret
+*/
+TEST_ZA_XN (write_w8_z23, svuint8x4_t,
+ svwrite_za8_u8_vg1x4 (w8, z23),
+ svwrite_za8_vg1x4 (w8, z23))
+
+/*
+** write_w8_z28:
+** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}
+** ret
+*/
+TEST_ZA_XN (write_w8_z28, svint8x4_t,
+ svwrite_za8_s8_vg1x4 (w8, z28),
+ svwrite_za8_vg1x4 (w8, z28))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#define SHARED_ZT0
+#include "test_sme2_acle.h"
+
+/*
+** zero_zt0:
+** zero { zt0 }
+** ret
+*/
+PROTO (zero_zt0, void, ()) { svzero_zt (0); }
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.h - z1\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (zip_z0_z0, svbfloat16x2_t, z0,
+ svzip_bf16_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.h - z1\.h}, z4\.h, z5\.h
+** ret
+*/
+TEST_XN (zip_z0_z4, svbfloat16x2_t, z0,
+ svzip_bf16_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.h - z5\.h}, z18\.h, z19\.h
+** ret
+*/
+TEST_XN (zip_z4_z18, svbfloat16x2_t, z4,
+ svzip_bf16_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.h - z19\.h}, z23\.h, z24\.h
+** ret
+*/
+TEST_XN (zip_z18_z23, svbfloat16x2_t, z18,
+ svzip_bf16_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.h, z29\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svbfloat16x2_t, z23,
+ svzip_bf16_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.h - z29\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (zip_z28_z0, svbfloat16x2_t, z28,
+ svzip_bf16_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.h - z29\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svbfloat16x2_t, z28,
+ svzip_bf16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.h - z29\.h}, z5\.h, z19\.h
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svbfloat16x2_t, z28,
+ svzip_bf16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (zip_z0_z0, svbfloat16x4_t, z0,
+ svzip_bf16_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (zip_z0_z4, svbfloat16x4_t, z0,
+ svzip_bf16_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.h - z7\.h}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svbfloat16x4_t, z4,
+ svzip_bf16_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svbfloat16x4_t, z18,
+ svzip_bf16_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svbfloat16x4_t, z23,
+ svzip_bf16_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (zip_z28_z0, svbfloat16x4_t, z28,
+ svzip_bf16_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.h - z1\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (zip_z0_z0, svfloat16x2_t, z0,
+ svzip_f16_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.h - z1\.h}, z4\.h, z5\.h
+** ret
+*/
+TEST_XN (zip_z0_z4, svfloat16x2_t, z0,
+ svzip_f16_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.h - z5\.h}, z18\.h, z19\.h
+** ret
+*/
+TEST_XN (zip_z4_z18, svfloat16x2_t, z4,
+ svzip_f16_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.h - z19\.h}, z23\.h, z24\.h
+** ret
+*/
+TEST_XN (zip_z18_z23, svfloat16x2_t, z18,
+ svzip_f16_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.h, z29\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svfloat16x2_t, z23,
+ svzip_f16_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.h - z29\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (zip_z28_z0, svfloat16x2_t, z28,
+ svzip_f16_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.h - z29\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svfloat16x2_t, z28,
+ svzip_f16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.h - z29\.h}, z5\.h, z19\.h
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svfloat16x2_t, z28,
+ svzip_f16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (zip_z0_z0, svfloat16x4_t, z0,
+ svzip_f16_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (zip_z0_z4, svfloat16x4_t, z0,
+ svzip_f16_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.h - z7\.h}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svfloat16x4_t, z4,
+ svzip_f16_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svfloat16x4_t, z18,
+ svzip_f16_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svfloat16x4_t, z23,
+ svzip_f16_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (zip_z28_z0, svfloat16x4_t, z28,
+ svzip_f16_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.s - z1\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (zip_z0_z0, svfloat32x2_t, z0,
+ svzip_f32_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.s - z1\.s}, z4\.s, z5\.s
+** ret
+*/
+TEST_XN (zip_z0_z4, svfloat32x2_t, z0,
+ svzip_f32_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.s - z5\.s}, z18\.s, z19\.s
+** ret
+*/
+TEST_XN (zip_z4_z18, svfloat32x2_t, z4,
+ svzip_f32_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.s - z19\.s}, z23\.s, z24\.s
+** ret
+*/
+TEST_XN (zip_z18_z23, svfloat32x2_t, z18,
+ svzip_f32_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.s, z29\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svfloat32x2_t, z23,
+ svzip_f32_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.s - z29\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (zip_z28_z0, svfloat32x2_t, z28,
+ svzip_f32_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.s - z29\.s}, z0\.s, z23\.s
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svfloat32x2_t, z28,
+ svzip_f32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.s - z29\.s}, z5\.s, z19\.s
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svfloat32x2_t, z28,
+ svzip_f32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (zip_z0_z0, svfloat32x4_t, z0,
+ svzip_f32_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (zip_z0_z4, svfloat32x4_t, z0,
+ svzip_f32_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svfloat32x4_t, z4,
+ svzip_f32_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svfloat32x4_t, z18,
+ svzip_f32_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svfloat32x4_t, z23,
+ svzip_f32_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (zip_z28_z0, svfloat32x4_t, z28,
+ svzip_f32_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.d - z1\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (zip_z0_z0, svfloat64x2_t, z0,
+ svzip_f64_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.d - z1\.d}, z4\.d, z5\.d
+** ret
+*/
+TEST_XN (zip_z0_z4, svfloat64x2_t, z0,
+ svzip_f64_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.d - z5\.d}, z18\.d, z19\.d
+** ret
+*/
+TEST_XN (zip_z4_z18, svfloat64x2_t, z4,
+ svzip_f64_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.d - z19\.d}, z23\.d, z24\.d
+** ret
+*/
+TEST_XN (zip_z18_z23, svfloat64x2_t, z18,
+ svzip_f64_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.d, z29\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svfloat64x2_t, z23,
+ svzip_f64_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.d - z29\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (zip_z28_z0, svfloat64x2_t, z28,
+ svzip_f64_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.d - z29\.d}, z0\.d, z23\.d
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svfloat64x2_t, z28,
+ svzip_f64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.d - z29\.d}, z5\.d, z19\.d
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svfloat64x2_t, z28,
+ svzip_f64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (zip_z0_z0, svfloat64x4_t, z0,
+ svzip_f64_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (zip_z0_z4, svfloat64x4_t, z0,
+ svzip_f64_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.d - z7\.d}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svfloat64x4_t, z4,
+ svzip_f64_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svfloat64x4_t, z18,
+ svzip_f64_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svfloat64x4_t, z23,
+ svzip_f64_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (zip_z28_z0, svfloat64x4_t, z28,
+ svzip_f64_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.h - z1\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (zip_z0_z0, svint16x2_t, z0,
+ svzip_s16_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.h - z1\.h}, z4\.h, z5\.h
+** ret
+*/
+TEST_XN (zip_z0_z4, svint16x2_t, z0,
+ svzip_s16_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.h - z5\.h}, z18\.h, z19\.h
+** ret
+*/
+TEST_XN (zip_z4_z18, svint16x2_t, z4,
+ svzip_s16_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.h - z19\.h}, z23\.h, z24\.h
+** ret
+*/
+TEST_XN (zip_z18_z23, svint16x2_t, z18,
+ svzip_s16_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.h, z29\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svint16x2_t, z23,
+ svzip_s16_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.h - z29\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (zip_z28_z0, svint16x2_t, z28,
+ svzip_s16_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.h - z29\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svint16x2_t, z28,
+ svzip_s16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.h - z29\.h}, z5\.h, z19\.h
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svint16x2_t, z28,
+ svzip_s16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (zip_z0_z0, svint16x4_t, z0,
+ svzip_s16_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (zip_z0_z4, svint16x4_t, z0,
+ svzip_s16_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.h - z7\.h}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svint16x4_t, z4,
+ svzip_s16_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svint16x4_t, z18,
+ svzip_s16_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svint16x4_t, z23,
+ svzip_s16_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (zip_z28_z0, svint16x4_t, z28,
+ svzip_s16_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.s - z1\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (zip_z0_z0, svint32x2_t, z0,
+ svzip_s32_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.s - z1\.s}, z4\.s, z5\.s
+** ret
+*/
+TEST_XN (zip_z0_z4, svint32x2_t, z0,
+ svzip_s32_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.s - z5\.s}, z18\.s, z19\.s
+** ret
+*/
+TEST_XN (zip_z4_z18, svint32x2_t, z4,
+ svzip_s32_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.s - z19\.s}, z23\.s, z24\.s
+** ret
+*/
+TEST_XN (zip_z18_z23, svint32x2_t, z18,
+ svzip_s32_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.s, z29\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svint32x2_t, z23,
+ svzip_s32_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.s - z29\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (zip_z28_z0, svint32x2_t, z28,
+ svzip_s32_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.s - z29\.s}, z0\.s, z23\.s
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svint32x2_t, z28,
+ svzip_s32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.s - z29\.s}, z5\.s, z19\.s
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svint32x2_t, z28,
+ svzip_s32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (zip_z0_z0, svint32x4_t, z0,
+ svzip_s32_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (zip_z0_z4, svint32x4_t, z0,
+ svzip_s32_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svint32x4_t, z4,
+ svzip_s32_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svint32x4_t, z18,
+ svzip_s32_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svint32x4_t, z23,
+ svzip_s32_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (zip_z28_z0, svint32x4_t, z28,
+ svzip_s32_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.d - z1\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (zip_z0_z0, svint64x2_t, z0,
+ svzip_s64_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.d - z1\.d}, z4\.d, z5\.d
+** ret
+*/
+TEST_XN (zip_z0_z4, svint64x2_t, z0,
+ svzip_s64_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.d - z5\.d}, z18\.d, z19\.d
+** ret
+*/
+TEST_XN (zip_z4_z18, svint64x2_t, z4,
+ svzip_s64_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.d - z19\.d}, z23\.d, z24\.d
+** ret
+*/
+TEST_XN (zip_z18_z23, svint64x2_t, z18,
+ svzip_s64_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.d, z29\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svint64x2_t, z23,
+ svzip_s64_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.d - z29\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (zip_z28_z0, svint64x2_t, z28,
+ svzip_s64_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.d - z29\.d}, z0\.d, z23\.d
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svint64x2_t, z28,
+ svzip_s64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.d - z29\.d}, z5\.d, z19\.d
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svint64x2_t, z28,
+ svzip_s64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (zip_z0_z0, svint64x4_t, z0,
+ svzip_s64_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (zip_z0_z4, svint64x4_t, z0,
+ svzip_s64_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.d - z7\.d}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svint64x4_t, z4,
+ svzip_s64_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svint64x4_t, z18,
+ svzip_s64_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svint64x4_t, z23,
+ svzip_s64_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (zip_z28_z0, svint64x4_t, z28,
+ svzip_s64_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.b - z1\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (zip_z0_z0, svint8x2_t, z0,
+ svzip_s8_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.b - z1\.b}, z4\.b, z5\.b
+** ret
+*/
+TEST_XN (zip_z0_z4, svint8x2_t, z0,
+ svzip_s8_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.b - z5\.b}, z18\.b, z19\.b
+** ret
+*/
+TEST_XN (zip_z4_z18, svint8x2_t, z4,
+ svzip_s8_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.b - z19\.b}, z23\.b, z24\.b
+** ret
+*/
+TEST_XN (zip_z18_z23, svint8x2_t, z18,
+ svzip_s8_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.b, z29\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svint8x2_t, z23,
+ svzip_s8_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.b - z29\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (zip_z28_z0, svint8x2_t, z28,
+ svzip_s8_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.b - z29\.b}, z0\.b, z23\.b
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svint8x2_t, z28,
+ svzip_s8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.b - z29\.b}, z5\.b, z19\.b
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svint8x2_t, z28,
+ svzip_s8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (zip_z0_z0, svint8x4_t, z0,
+ svzip_s8_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (zip_z0_z4, svint8x4_t, z0,
+ svzip_s8_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.b - z7\.b}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svint8x4_t, z4,
+ svzip_s8_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svint8x4_t, z18,
+ svzip_s8_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svint8x4_t, z23,
+ svzip_s8_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (zip_z28_z0, svint8x4_t, z28,
+ svzip_s8_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.h - z1\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (zip_z0_z0, svuint16x2_t, z0,
+ svzip_u16_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.h - z1\.h}, z4\.h, z5\.h
+** ret
+*/
+TEST_XN (zip_z0_z4, svuint16x2_t, z0,
+ svzip_u16_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.h - z5\.h}, z18\.h, z19\.h
+** ret
+*/
+TEST_XN (zip_z4_z18, svuint16x2_t, z4,
+ svzip_u16_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.h - z19\.h}, z23\.h, z24\.h
+** ret
+*/
+TEST_XN (zip_z18_z23, svuint16x2_t, z18,
+ svzip_u16_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.h, z29\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svuint16x2_t, z23,
+ svzip_u16_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.h - z29\.h}, z0\.h, z1\.h
+** ret
+*/
+TEST_XN (zip_z28_z0, svuint16x2_t, z28,
+ svzip_u16_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.h - z29\.h}, z0\.h, z23\.h
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svuint16x2_t, z28,
+ svzip_u16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.h - z29\.h}, z5\.h, z19\.h
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svuint16x2_t, z28,
+ svzip_u16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.h - z3\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (zip_z0_z0, svuint16x4_t, z0,
+ svzip_u16_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (zip_z0_z4, svuint16x4_t, z0,
+ svzip_u16_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.h - z7\.h}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svuint16x4_t, z4,
+ svzip_u16_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svuint16x4_t, z18,
+ svzip_u16_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svuint16x4_t, z23,
+ svzip_u16_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (zip_z28_z0, svuint16x4_t, z28,
+ svzip_u16_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.s - z1\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (zip_z0_z0, svuint32x2_t, z0,
+ svzip_u32_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.s - z1\.s}, z4\.s, z5\.s
+** ret
+*/
+TEST_XN (zip_z0_z4, svuint32x2_t, z0,
+ svzip_u32_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.s - z5\.s}, z18\.s, z19\.s
+** ret
+*/
+TEST_XN (zip_z4_z18, svuint32x2_t, z4,
+ svzip_u32_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.s - z19\.s}, z23\.s, z24\.s
+** ret
+*/
+TEST_XN (zip_z18_z23, svuint32x2_t, z18,
+ svzip_u32_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.s, z29\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svuint32x2_t, z23,
+ svzip_u32_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.s - z29\.s}, z0\.s, z1\.s
+** ret
+*/
+TEST_XN (zip_z28_z0, svuint32x2_t, z28,
+ svzip_u32_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.s - z29\.s}, z0\.s, z23\.s
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svuint32x2_t, z28,
+ svzip_u32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.s - z29\.s}, z5\.s, z19\.s
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svuint32x2_t, z28,
+ svzip_u32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.s - z3\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (zip_z0_z0, svuint32x4_t, z0,
+ svzip_u32_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (zip_z0_z4, svuint32x4_t, z0,
+ svzip_u32_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.s - z7\.s}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svuint32x4_t, z4,
+ svzip_u32_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svuint32x4_t, z18,
+ svzip_u32_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svuint32x4_t, z23,
+ svzip_u32_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (zip_z28_z0, svuint32x4_t, z28,
+ svzip_u32_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.d - z1\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (zip_z0_z0, svuint64x2_t, z0,
+ svzip_u64_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.d - z1\.d}, z4\.d, z5\.d
+** ret
+*/
+TEST_XN (zip_z0_z4, svuint64x2_t, z0,
+ svzip_u64_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.d - z5\.d}, z18\.d, z19\.d
+** ret
+*/
+TEST_XN (zip_z4_z18, svuint64x2_t, z4,
+ svzip_u64_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.d - z19\.d}, z23\.d, z24\.d
+** ret
+*/
+TEST_XN (zip_z18_z23, svuint64x2_t, z18,
+ svzip_u64_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.d, z29\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svuint64x2_t, z23,
+ svzip_u64_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.d - z29\.d}, z0\.d, z1\.d
+** ret
+*/
+TEST_XN (zip_z28_z0, svuint64x2_t, z28,
+ svzip_u64_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.d - z29\.d}, z0\.d, z23\.d
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svuint64x2_t, z28,
+ svzip_u64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.d - z29\.d}, z5\.d, z19\.d
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svuint64x2_t, z28,
+ svzip_u64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.d - z3\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (zip_z0_z0, svuint64x4_t, z0,
+ svzip_u64_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (zip_z0_z4, svuint64x4_t, z0,
+ svzip_u64_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.d - z7\.d}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svuint64x4_t, z4,
+ svzip_u64_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svuint64x4_t, z18,
+ svzip_u64_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svuint64x4_t, z23,
+ svzip_u64_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (zip_z28_z0, svuint64x4_t, z28,
+ svzip_u64_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.b - z1\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (zip_z0_z0, svuint8x2_t, z0,
+ svzip_u8_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.b - z1\.b}, z4\.b, z5\.b
+** ret
+*/
+TEST_XN (zip_z0_z4, svuint8x2_t, z0,
+ svzip_u8_x2 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** zip {z4\.b - z5\.b}, z18\.b, z19\.b
+** ret
+*/
+TEST_XN (zip_z4_z18, svuint8x2_t, z4,
+ svzip_u8_x2 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** zip {z18\.b - z19\.b}, z23\.b, z24\.b
+** ret
+*/
+TEST_XN (zip_z18_z23, svuint8x2_t, z18,
+ svzip_u8_x2 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, z28\.b, z29\.b
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svuint8x2_t, z23,
+ svzip_u8_x2 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.b - z29\.b}, z0\.b, z1\.b
+** ret
+*/
+TEST_XN (zip_z28_z0, svuint8x2_t, z28,
+ svzip_u8_x2 (z0),
+ svzip (z0))
+
+/*
+** zip_z28_z0_z23:
+** zip {z28\.b - z29\.b}, z0\.b, z23\.b
+** ret
+*/
+TEST_XN (zip_z28_z0_z23, svuint8x2_t, z28,
+ svzip_u8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zip_z28_z5_z19:
+** zip {z28\.b - z29\.b}, z5\.b, z19\.b
+** ret
+*/
+TEST_XN (zip_z28_z5_z19, svuint8x2_t, z28,
+ svzip_u8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zip_z0_z0:
+** zip {z0\.b - z3\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (zip_z0_z0, svuint8x4_t, z0,
+ svzip_u8_x4 (z0),
+ svzip (z0))
+
+/*
+** zip_z0_z4:
+** zip {z0\.b - z3\.b}, {z4\.b - z7\.b}
+** ret
+*/
+TEST_XN (zip_z0_z4, svuint8x4_t, z0,
+ svzip_u8_x4 (z4),
+ svzip (z4))
+
+/*
+** zip_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.b - z7\.b}, [^\n]+
+** ret
+*/
+TEST_XN (zip_z4_z18, svuint8x4_t, z4,
+ svzip_u8_x4 (z18),
+ svzip (z18))
+
+/*
+** zip_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z18_z23, svuint8x4_t, z18,
+ svzip_u8_x4 (z23),
+ svzip (z23))
+
+/*
+** zip_z23_z28:
+** zip [^\n]+, {z28\.b - z31\.b}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zip_z23_z28, svuint8x4_t, z23,
+ svzip_u8_x4 (z28),
+ svzip (z28))
+
+/*
+** zip_z28_z0:
+** zip {z28\.b - z31\.b}, {z0\.b - z3\.b}
+** ret
+*/
+TEST_XN (zip_z28_z0, svuint8x4_t, z28,
+ svzip_u8_x4 (z0),
+ svzip (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svbfloat16x2_t, z0,
+ svzipq_bf16_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svbfloat16x2_t, z0,
+ svzipq_bf16_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svbfloat16x2_t, z4,
+ svzipq_bf16_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svbfloat16x2_t, z18,
+ svzipq_bf16_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svbfloat16x2_t, z23,
+ svzipq_bf16_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svbfloat16x2_t, z28,
+ svzipq_bf16_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svbfloat16x2_t, z28,
+ svzipq_bf16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svbfloat16x2_t, z28,
+ svzipq_bf16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svbfloat16x4_t, z0,
+ svzipq_bf16_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svbfloat16x4_t, z0,
+ svzipq_bf16_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svbfloat16x4_t, z4,
+ svzipq_bf16_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svbfloat16x4_t, z18,
+ svzipq_bf16_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svbfloat16x4_t, z23,
+ svzipq_bf16_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svbfloat16x4_t, z28,
+ svzipq_bf16_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svfloat16x2_t, z0,
+ svzipq_f16_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svfloat16x2_t, z0,
+ svzipq_f16_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svfloat16x2_t, z4,
+ svzipq_f16_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svfloat16x2_t, z18,
+ svzipq_f16_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svfloat16x2_t, z23,
+ svzipq_f16_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svfloat16x2_t, z28,
+ svzipq_f16_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svfloat16x2_t, z28,
+ svzipq_f16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svfloat16x2_t, z28,
+ svzipq_f16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svfloat16x4_t, z0,
+ svzipq_f16_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svfloat16x4_t, z0,
+ svzipq_f16_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svfloat16x4_t, z4,
+ svzipq_f16_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svfloat16x4_t, z18,
+ svzipq_f16_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svfloat16x4_t, z23,
+ svzipq_f16_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svfloat16x4_t, z28,
+ svzipq_f16_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svfloat32x2_t, z0,
+ svzipq_f32_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svfloat32x2_t, z0,
+ svzipq_f32_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svfloat32x2_t, z4,
+ svzipq_f32_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svfloat32x2_t, z18,
+ svzipq_f32_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svfloat32x2_t, z23,
+ svzipq_f32_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svfloat32x2_t, z28,
+ svzipq_f32_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svfloat32x2_t, z28,
+ svzipq_f32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svfloat32x2_t, z28,
+ svzipq_f32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svfloat32x4_t, z0,
+ svzipq_f32_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svfloat32x4_t, z0,
+ svzipq_f32_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svfloat32x4_t, z4,
+ svzipq_f32_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svfloat32x4_t, z18,
+ svzipq_f32_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svfloat32x4_t, z23,
+ svzipq_f32_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svfloat32x4_t, z28,
+ svzipq_f32_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svfloat64x2_t, z0,
+ svzipq_f64_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svfloat64x2_t, z0,
+ svzipq_f64_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svfloat64x2_t, z4,
+ svzipq_f64_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svfloat64x2_t, z18,
+ svzipq_f64_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svfloat64x2_t, z23,
+ svzipq_f64_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svfloat64x2_t, z28,
+ svzipq_f64_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svfloat64x2_t, z28,
+ svzipq_f64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svfloat64x2_t, z28,
+ svzipq_f64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svfloat64x4_t, z0,
+ svzipq_f64_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svfloat64x4_t, z0,
+ svzipq_f64_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svfloat64x4_t, z4,
+ svzipq_f64_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svfloat64x4_t, z18,
+ svzipq_f64_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svfloat64x4_t, z23,
+ svzipq_f64_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svfloat64x4_t, z28,
+ svzipq_f64_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svint16x2_t, z0,
+ svzipq_s16_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svint16x2_t, z0,
+ svzipq_s16_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svint16x2_t, z4,
+ svzipq_s16_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svint16x2_t, z18,
+ svzipq_s16_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svint16x2_t, z23,
+ svzipq_s16_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svint16x2_t, z28,
+ svzipq_s16_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svint16x2_t, z28,
+ svzipq_s16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svint16x2_t, z28,
+ svzipq_s16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svint16x4_t, z0,
+ svzipq_s16_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svint16x4_t, z0,
+ svzipq_s16_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svint16x4_t, z4,
+ svzipq_s16_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svint16x4_t, z18,
+ svzipq_s16_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svint16x4_t, z23,
+ svzipq_s16_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svint16x4_t, z28,
+ svzipq_s16_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svint32x2_t, z0,
+ svzipq_s32_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svint32x2_t, z0,
+ svzipq_s32_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svint32x2_t, z4,
+ svzipq_s32_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svint32x2_t, z18,
+ svzipq_s32_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svint32x2_t, z23,
+ svzipq_s32_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svint32x2_t, z28,
+ svzipq_s32_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svint32x2_t, z28,
+ svzipq_s32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svint32x2_t, z28,
+ svzipq_s32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svint32x4_t, z0,
+ svzipq_s32_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svint32x4_t, z0,
+ svzipq_s32_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svint32x4_t, z4,
+ svzipq_s32_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svint32x4_t, z18,
+ svzipq_s32_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svint32x4_t, z23,
+ svzipq_s32_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svint32x4_t, z28,
+ svzipq_s32_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svint64x2_t, z0,
+ svzipq_s64_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svint64x2_t, z0,
+ svzipq_s64_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svint64x2_t, z4,
+ svzipq_s64_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svint64x2_t, z18,
+ svzipq_s64_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svint64x2_t, z23,
+ svzipq_s64_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svint64x2_t, z28,
+ svzipq_s64_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svint64x2_t, z28,
+ svzipq_s64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svint64x2_t, z28,
+ svzipq_s64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svint64x4_t, z0,
+ svzipq_s64_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svint64x4_t, z0,
+ svzipq_s64_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svint64x4_t, z4,
+ svzipq_s64_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svint64x4_t, z18,
+ svzipq_s64_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svint64x4_t, z23,
+ svzipq_s64_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svint64x4_t, z28,
+ svzipq_s64_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svint8x2_t, z0,
+ svzipq_s8_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svint8x2_t, z0,
+ svzipq_s8_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svint8x2_t, z4,
+ svzipq_s8_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svint8x2_t, z18,
+ svzipq_s8_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svint8x2_t, z23,
+ svzipq_s8_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svint8x2_t, z28,
+ svzipq_s8_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svint8x2_t, z28,
+ svzipq_s8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svint8x2_t, z28,
+ svzipq_s8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svint8x4_t, z0,
+ svzipq_s8_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svint8x4_t, z0,
+ svzipq_s8_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svint8x4_t, z4,
+ svzipq_s8_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svint8x4_t, z18,
+ svzipq_s8_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svint8x4_t, z23,
+ svzipq_s8_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svint8x4_t, z28,
+ svzipq_s8_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svuint16x2_t, z0,
+ svzipq_u16_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svuint16x2_t, z0,
+ svzipq_u16_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svuint16x2_t, z4,
+ svzipq_u16_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svuint16x2_t, z18,
+ svzipq_u16_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svuint16x2_t, z23,
+ svzipq_u16_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svuint16x2_t, z28,
+ svzipq_u16_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svuint16x2_t, z28,
+ svzipq_u16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svuint16x2_t, z28,
+ svzipq_u16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svuint16x4_t, z0,
+ svzipq_u16_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svuint16x4_t, z0,
+ svzipq_u16_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svuint16x4_t, z4,
+ svzipq_u16_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svuint16x4_t, z18,
+ svzipq_u16_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svuint16x4_t, z23,
+ svzipq_u16_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svuint16x4_t, z28,
+ svzipq_u16_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svuint32x2_t, z0,
+ svzipq_u32_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svuint32x2_t, z0,
+ svzipq_u32_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svuint32x2_t, z4,
+ svzipq_u32_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svuint32x2_t, z18,
+ svzipq_u32_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svuint32x2_t, z23,
+ svzipq_u32_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svuint32x2_t, z28,
+ svzipq_u32_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svuint32x2_t, z28,
+ svzipq_u32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svuint32x2_t, z28,
+ svzipq_u32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svuint32x4_t, z0,
+ svzipq_u32_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svuint32x4_t, z0,
+ svzipq_u32_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svuint32x4_t, z4,
+ svzipq_u32_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svuint32x4_t, z18,
+ svzipq_u32_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svuint32x4_t, z23,
+ svzipq_u32_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svuint32x4_t, z28,
+ svzipq_u32_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svuint64x2_t, z0,
+ svzipq_u64_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svuint64x2_t, z0,
+ svzipq_u64_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svuint64x2_t, z4,
+ svzipq_u64_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svuint64x2_t, z18,
+ svzipq_u64_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svuint64x2_t, z23,
+ svzipq_u64_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svuint64x2_t, z28,
+ svzipq_u64_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svuint64x2_t, z28,
+ svzipq_u64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svuint64x2_t, z28,
+ svzipq_u64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svuint64x4_t, z0,
+ svzipq_u64_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svuint64x4_t, z0,
+ svzipq_u64_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svuint64x4_t, z4,
+ svzipq_u64_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svuint64x4_t, z18,
+ svzipq_u64_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svuint64x4_t, z23,
+ svzipq_u64_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svuint64x4_t, z28,
+ svzipq_u64_x4 (z0),
+ svzipq (z0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z1\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z0_z0, svuint8x2_t, z0,
+ svzipq_u8_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z1\.q}, z4\.q, z5\.q
+** ret
+*/
+TEST_XN (zipq_z0_z4, svuint8x2_t, z0,
+ svzipq_u8_x2 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** zip {z4\.q - z5\.q}, z18\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z4_z18, svuint8x2_t, z4,
+ svzipq_u8_x2 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** zip {z18\.q - z19\.q}, z23\.q, z24\.q
+** ret
+*/
+TEST_XN (zipq_z18_z23, svuint8x2_t, z18,
+ svzipq_u8_x2 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, z28\.q, z29\.q
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svuint8x2_t, z23,
+ svzipq_u8_x2 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z29\.q}, z0\.q, z1\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0, svuint8x2_t, z28,
+ svzipq_u8_x2 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z28_z0_z23:
+** zip {z28\.q - z29\.q}, z0\.q, z23\.q
+** ret
+*/
+TEST_XN (zipq_z28_z0_z23, svuint8x2_t, z28,
+ svzipq_u8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),
+ svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))
+
+/*
+** zipq_z28_z5_z19:
+** zip {z28\.q - z29\.q}, z5\.q, z19\.q
+** ret
+*/
+TEST_XN (zipq_z28_z5_z19, svuint8x2_t, z28,
+ svzipq_u8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),
+ svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+/*
+** zipq_z0_z0:
+** zip {z0\.q - z3\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z0, svuint8x4_t, z0,
+ svzipq_u8_x4 (z0),
+ svzipq (z0))
+
+/*
+** zipq_z0_z4:
+** zip {z0\.q - z3\.q}, {z4\.q - z7\.q}
+** ret
+*/
+TEST_XN (zipq_z0_z4, svuint8x4_t, z0,
+ svzipq_u8_x4 (z4),
+ svzipq (z4))
+
+/*
+** zipq_z4_z18:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z4\.q - z7\.q}, [^\n]+
+** ret
+*/
+TEST_XN (zipq_z4_z18, svuint8x4_t, z4,
+ svzipq_u8_x4 (z18),
+ svzipq (z18))
+
+/*
+** zipq_z18_z23:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** zip {z[^\n]+}, {z[^\n]+}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z18_z23, svuint8x4_t, z18,
+ svzipq_u8_x4 (z23),
+ svzipq (z23))
+
+/*
+** zipq_z23_z28:
+** zip [^\n]+, {z28\.q - z31\.q}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (zipq_z23_z28, svuint8x4_t, z23,
+ svzipq_u8_x4 (z28),
+ svzipq (z28))
+
+/*
+** zipq_z28_z0:
+** zip {z28\.q - z31\.q}, {z0\.q - z3\.q}
+** ret
+*/
+TEST_XN (zipq_z28_z0, svuint8x4_t, z28,
+ svzipq_u8_x4 (z0),
+ svzipq (z0))
TEST_CREATE (create2_f64, svfloat64x2_t, svfloat64_t,
z0 = svcreate2_f64 (z5, z4),
z0 = svcreate2 (z5, z4))
+
+/*
+** create2_b_0:
+** ret
+*/
+TEST_CREATE_B (create2_b_0, svboolx2_t,
+ p0_res = svcreate2_b (p0, p1),
+ p0_res = svcreate2 (p0, p1))
+
+/*
+** create2_b_1:
+** mov p0\.b, p2\.b
+** mov p1\.b, p3\.b
+** ret
+*/
+TEST_CREATE_B (create2_b_1, svboolx2_t,
+ p0_res = svcreate2_b (p2, p3),
+ p0_res = svcreate2 (p2, p3))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** get2_b_p0_0:
+** mov p0\.b, p4\.b
+** ret
+*/
+TEST_GET_B (get2_b_p0_0, svboolx2_t,
+ p0 = svget2_b (p4, 0),
+ p0 = svget2 (p4, 0))
+
+/*
+** get2_b_p0_1:
+** mov p0\.b, p5\.b
+** ret
+*/
+TEST_GET_B (get2_b_p0_1, svboolx2_t,
+ p0 = svget2_b (p4, 1),
+ p0 = svget2 (p4, 1))
+
+/*
+** get2_b_p4_0:
+** ret
+*/
+TEST_GET_B (get2_b_p4_0, svboolx2_t,
+ p4_res = svget2_b (p4, 0),
+ p4_res = svget2 (p4, 0))
+
+/*
+** get2_b_p4_1:
+** mov p4\.b, p5\.b
+** ret
+*/
+TEST_GET_B (get2_b_p4_1, svboolx2_t,
+ p4_res = svget2_b (p4, 1),
+ p4_res = svget2 (p4, 1))
+
+/*
+** get2_b_p5_0:
+** mov p5\.b, p4\.b
+** ret
+*/
+TEST_GET_B (get2_b_p5_0, svboolx2_t,
+ p5_res = svget2_b (p4, 0),
+ p5_res = svget2 (p4, 0))
+
+/*
+** get2_b_p5_1:
+** ret
+*/
+TEST_GET_B (get2_b_p5_1, svboolx2_t,
+ p5_res = svget2_b (p4, 1),
+ p5_res = svget2 (p4, 1))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** set2_b_p8_0:
+** mov p9\.b, p5\.b
+** mov p8\.b, p0\.b
+** ret
+*/
+TEST_SET_B (set2_b_p8_0, svboolx2_t,
+ p8 = svset2_b (p4, 0, p0),
+ p8 = svset2 (p4, 0, p0))
+
+/*
+** set2_b_p8_1:
+** mov p8\.b, p4\.b
+** mov p9\.b, p0\.b
+** ret
+*/
+TEST_SET_B (set2_b_p8_1, svboolx2_t,
+ p8 = svset2_b (p4, 1, p0),
+ p8 = svset2 (p4, 1, p0))
+
+/*
+** set2_b_p4_0:
+** mov p4\.b, p12\.b
+** ret
+*/
+TEST_SET_B (set2_b_p4_0, svboolx2_t,
+ p4 = svset2_b (p4, 0, p12),
+ p4 = svset2 (p4, 0, p12))
+
+/*
+** set2_b_p4_1:
+** mov p5\.b, p13\.b
+** ret
+*/
+TEST_SET_B (set2_b_p4_1, svboolx2_t,
+ p4 = svset2_b (p4, 1, p13),
+ p4 = svset2 (p4, 1, p13))
#define ZA_ATTR
#endif
-#define ATTR SM_ATTR ZA_ATTR
+#ifdef SHARED_ZT0
+#define ZT0_ATTR __arm_inout("zt0")
+#else
+#define ZT0_ATTR
+#endif
+
+#define ATTR SM_ATTR ZA_ATTR ZT0_ATTR
#ifdef __cplusplus
#define PROTO(NAME, RET, ARGS) \
return z0; \
}
+#define TEST_LOAD_COUNT(NAME, TTYPE, STYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (const STYPE *x0, intptr_t x1)) \
+ { \
+ register svcount_t pn0 __asm ("pn0"); \
+ register svcount_t pn7 __asm ("pn7"); \
+ register svcount_t pn8 __asm ("pn8"); \
+ register svcount_t pn15 __asm ("pn15"); \
+ register TTYPE z0 __asm ("z0"); \
+ register TTYPE z17 __asm ("z17"); \
+ register TTYPE z22 __asm ("z22"); \
+ register TTYPE z28 __asm ("z28"); \
+ __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7), \
+ "=Upa" (pn8), "=Upa" (pn15)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "w" (z0), "w" (z17), \
+ "w" (z22), "w" (z28)); \
+ }
+
#define TEST_LOAD_GATHER_SZ(NAME, RES_TYPE, STYPE, ZTYPE, CODE1, CODE2) \
PROTO (NAME, RES_TYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0, \
const STYPE *x0)) \
INVOKE (CODE1, CODE2); \
}
+#define TEST_STORE_COUNT(NAME, TTYPE, STYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (STYPE *x0, intptr_t x1)) \
+ { \
+ register svcount_t pn0 __asm ("pn0"); \
+ register svcount_t pn7 __asm ("pn7"); \
+ register svcount_t pn8 __asm ("pn8"); \
+ register svcount_t pn15 __asm ("pn15"); \
+ register TTYPE z0 __asm ("z0"); \
+ register TTYPE z17 __asm ("z17"); \
+ register TTYPE z22 __asm ("z22"); \
+ register TTYPE z28 __asm ("z28"); \
+ __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7), \
+ "=Upa" (pn8), "=Upa" (pn15), \
+ "=w" (z0), "=w" (z17), "=w" (z22), \
+ "=w" (z28)); \
+ INVOKE (CODE1, CODE2); \
+ }
+
#define TEST_STORE_SCATTER_SZ(NAME, DATA_TYPE, STYPE, ZTYPE, CODE1, CODE2) \
PROTO (NAME, void, (DATA_TYPE z0, ZTYPE z1, svbool_t p0, \
STYPE *x0)) \
return x0; \
}
+#define TEST_PN(NAME, CODE1, CODE2) \
+ PROTO (NAME, void, (void)) \
+ { \
+ register svcount_t pn0 __asm("pn0"); \
+ register svcount_t pn7 __asm("pn7"); \
+ register svcount_t pn8 __asm("pn8"); \
+ register svcount_t pn15 __asm("pn15"); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "Upa" (pn0), "Upa" (pn7), \
+ "Upa" (pn8), "Upa" (pn15)); \
+ }
+
+#define TEST_COUNT_PN(NAME, CODE1, CODE2) \
+ PROTO (NAME, void, (void)) \
+ { \
+ register svcount_t pn0 __asm ("pn0"); \
+ register svcount_t pn7 __asm ("pn7"); \
+ register svcount_t pn8 __asm ("pn8"); \
+ register svcount_t pn15 __asm ("pn15"); \
+ register uint64_t x0 __asm ("x0"); \
+ register uint64_t x15 __asm ("x15"); \
+ register uint64_t x17 __asm ("x17"); \
+ __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7), \
+ "=Upa" (pn8), "=Upa" (pn15)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "r" (x0), "r" (x15), \
+ "r" (x17)); \
+ }
+
+#define TEST_EXTRACT_PN(NAME, TYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (void)) \
+ { \
+ register svcount_t pn0 __asm ("pn0"); \
+ register TYPE p2 __asm ("p2"); \
+ register TYPE p5 __asm ("p5"); \
+ register svcount_t pn7 __asm ("pn7"); \
+ register svcount_t pn8 __asm ("pn8"); \
+ register TYPE p9 __asm ("p9"); \
+ register svcount_t pn11 __asm ("pn11"); \
+ register TYPE p12 __asm ("p12"); \
+ register svcount_t pn15 __asm ("pn15"); \
+ __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7), \
+ "=Upa" (pn8), "=Upa" (pn11), \
+ "=Upa" (pn15)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "Upa" (p2), "Upa" (p5), \
+ "Upa" (p9), "Upa" (p12)); \
+ }
+
+#define TEST_SELECT_P(NAME, TYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (void)) \
+ { \
+ register TYPE p0 __asm ("p0"); \
+ register TYPE p2 __asm ("p2"); \
+ register svbool_t p7 __asm ("p7"); \
+ register svbool_t p8 __asm ("p8"); \
+ register TYPE p13 __asm ("p13"); \
+ register svbool_t p15 __asm ("p15"); \
+ register int32_t w11 __asm ("w11"); \
+ register int32_t w12 __asm ("w12"); \
+ register int32_t w15 __asm ("w15"); \
+ register int32_t w16 __asm ("w16"); \
+ __asm volatile ("" : "=Upa" (p0), "=Upa" (p2), \
+ "=Upa" (p7), "=Upa" (p8), \
+ "=Upa" (p13), "=Upa" (p15), \
+ "=r" (w11), "=r" (w12), \
+ "=r" (w15), "=r" (w16)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "Upa" (p0), "Upa" (p2), \
+ "Upa" (p7), "Upa" (p8), \
+ "Upa" (p13), "Upa" (p15)); \
+ }
+
#define TEST_COMPARE_S(NAME, TYPE, CODE1, CODE2) \
PROTO (NAME, svbool_t, (TYPE x0, TYPE x1)) \
{ \
return p0; \
}
+#define TEST_COMPARE_S_X2(NAME, TYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (TYPE x0, TYPE x1)) \
+ { \
+ register svboolx2_t p1 __asm("p1"); \
+ register svboolx2_t p4 __asm("p4"); \
+ register svboolx2_t p9 __asm("p9"); \
+ register svboolx2_t p14 __asm("p14"); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "Upa" (p1), "Upa" (p4), \
+ "Upa" (p9), "Upa" (p14)); \
+ }
+
+#define TEST_COMPARE_S_C(NAME, TYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (TYPE x0, TYPE x1)) \
+ { \
+ register svcount_t pn0 __asm("pn0"); \
+ register svcount_t pn7 __asm("pn7"); \
+ register svcount_t pn8 __asm("pn8"); \
+ register svcount_t pn15 __asm("pn15"); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "Upa" (pn0), "Upa" (pn7), \
+ "Upa" (pn8), "Upa" (pn15)); \
+ }
+
#define TEST_COMPARE_Z(NAME, TYPE, CODE1, CODE2) \
PROTO (NAME, svbool_t, (TYPE z0, TYPE z1, \
svbool_t p0, svbool_t p1)) \
return z0; \
}
+#define TEST_CREATE_B(NAME, TTYPE, CODE1, CODE2) \
+ PROTO (NAME, TTYPE, (svbool_t p0, svbool_t p1, \
+ svbool_t p2, svbool_t p3)) \
+ { \
+ TTYPE p0_res; \
+ INVOKE (CODE1, CODE2); \
+ return p0_res; \
+ }
+
#define TEST_GET(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
PROTO (NAME, void, (ZTYPE unused0, ZTYPE unused1, \
ZTYPE unused2, ZTYPE unused3, TTYPE z4)) \
"w" (z6_res), "w" (z7_res)); \
}
+#define TEST_GET_B(NAME, TTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (void)) \
+ { \
+ register svbool_t p0 __asm ("p0"); \
+ register TTYPE p4 __asm ("p4"); \
+ register svbool_t p4_res __asm ("p4"); \
+ register svbool_t p5_res __asm ("p5"); \
+ register svbool_t p6_res __asm ("p6"); \
+ register svbool_t p7_res __asm ("p7"); \
+ __asm volatile ("" : "=Upa" (p0), "=Upa" (p4)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "Upa" (p0), "Upa" (p4_res), \
+ "Upa" (p5_res), "Upa" (p6_res), \
+ "Upa" (p7_res)); \
+ }
+
#define TEST_SET(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3, \
TTYPE z4)) \
__asm volatile ("" :: "w" (z4), "w" (z24)); \
}
+#define TEST_SET_B(NAME, TTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (void)) \
+ { \
+ register svbool_t p0 __asm ("p0"); \
+ register TTYPE p4 __asm ("p4"); \
+ register TTYPE p8 __asm ("p8"); \
+ register svbool_t p12 __asm ("p12"); \
+ register svbool_t p13 __asm ("p13"); \
+ __asm volatile ("" : "=Upa" (p0), "=Upa" (p4), \
+ "=Upa" (p12), "=Upa" (p13)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "Upa" (p4), "Upa" (p8)); \
+ }
+
#define TEST_TBL2(NAME, TTYPE, ZTYPE, UTYPE, CODE1, CODE2) \
PROTO (NAME, ZTYPE, (TTYPE z0, TTYPE z2, UTYPE z4)) \
{ \
return z0_res; \
}
+#define TEST_XN(NAME, TTYPE, RES, CODE1, CODE2) \
+ PROTO (NAME, void, ()) \
+ { \
+ register TTYPE z0 __asm ("z0"); \
+ register TTYPE z4 __asm ("z4"); \
+ register TTYPE z18 __asm ("z18"); \
+ register TTYPE z23 __asm ("z23"); \
+ register TTYPE z28 __asm ("z28"); \
+ register svcount_t pn0 __asm ("pn0"); \
+ register svcount_t pn7 __asm ("pn7"); \
+ register svcount_t pn8 __asm ("pn8"); \
+ register svcount_t pn15 __asm ("pn15"); \
+ __asm volatile ("" : "=w" (z0), "=w" (z4), "=w" (z18), \
+ "=w" (z23), "=w" (z28), "=Upa" (pn0), \
+ "=Upa" (pn7), "=Upa" (pn8), "=Upa" (pn15)); \
+ INVOKE (RES = CODE1, RES = CODE2); \
+ __asm volatile ("" :: "w" (RES)); \
+ }
+
#define TEST_DUAL_XN(NAME, TTYPE1, TTYPE2, RES, CODE1, CODE2) \
PROTO (NAME, void, ()) \
{ \
__asm volatile ("" :: "w" (RES)); \
}
+#define TEST_XN_SINGLE(NAME, TTYPE, ZTYPE, RES, CODE1, CODE2) \
+ PROTO (NAME, void, ()) \
+ { \
+ register ZTYPE z0 __asm ("z0"); \
+ register TTYPE z1 __asm ("z1"); \
+ register ZTYPE z5 __asm ("z5"); \
+ register ZTYPE z7 __asm ("z7"); \
+ register ZTYPE z16 __asm ("z16"); \
+ register TTYPE z18 __asm ("z18"); \
+ register ZTYPE z23 __asm ("z23"); \
+ register TTYPE z24 __asm ("z24"); \
+ register TTYPE z28 __asm ("z28"); \
+ __asm volatile ("" : "=w" (z0), "=w" (z1), "=w" (z5), \
+ "=w" (z7), "=w" (z16), "=w" (z18), \
+ "=w" (z23), "=w" (z24), "=w" (z28)); \
+ INVOKE (RES = CODE1, RES = CODE2); \
+ __asm volatile ("" :: "w" (RES)); \
+ }
+
+#define TEST_XN_SINGLE_Z15(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO (NAME, TTYPE, (TTYPE z0)) \
+ { \
+ register ZTYPE z15 __asm ("z15"); \
+ __asm volatile ("" : "=w" (z15)); \
+ INVOKE (CODE1, CODE2); \
+ return z0; \
+ }
+
+#define TEST_XN_SINGLE_AWKWARD(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO (NAME, TTYPE, (ZTYPE z0, TTYPE z1, ZTYPE zn)) \
+ { \
+ TTYPE z0_res; \
+ INVOKE (CODE1, CODE2); \
+ return z0_res; \
+ }
+
+#define TEST_X2_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, ()) \
+ { \
+ register TTYPE z0 __asm ("z0"); \
+ register ZTYPE z5 __asm ("z5"); \
+ register TTYPE z6 __asm ("z6"); \
+ register TTYPE z16 __asm ("z16"); \
+ register ZTYPE z22 __asm ("z22"); \
+ register TTYPE z29 __asm ("z29"); \
+ register ZTYPE z0_res __asm ("z0"); \
+ __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z6), \
+ "=w" (z16), "=w" (z22), "=w" (z29)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z22)); \
+ }
+
+#define TEST_X4_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO (NAME, void, ()) \
+ { \
+ register TTYPE z0 __asm ("z0"); \
+ register TTYPE z4 __asm ("z4"); \
+ register TTYPE z16 __asm ("z16"); \
+ register TTYPE z21 __asm ("z21"); \
+ register ZTYPE z25 __asm ("z25"); \
+ register TTYPE z26 __asm ("z26"); \
+ register ZTYPE z0_res __asm ("z0"); \
+ register ZTYPE z22_res __asm ("z22"); \
+ __asm volatile ("" : "=w" (z0), "=w" (z4), "=w" (z16), \
+ "=w" (z21), "=w" (z26)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "w" (z0_res), "w" (z22_res), \
+ "w" (z25)); \
+ }
+
#endif
--- /dev/null
+/* { dg-do compile } */
+
+#pragma GCC target "+sve2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16,
+ svfloat32_t f32, svint32_t s32, svuint32_t u32, svint32x2_t s32x2,
+ svuint32x2_t u32x2)
+{
+ svrshl_x (pg, s16); /* { dg-error {too few arguments to function 'svrshl_x'} } */
+ svrshl_x (pg, s16, s16, s16); /* { dg-error {too many arguments to function 'svrshl_x'} } */
+ svrshl_x (s32, s16, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svrshl_x', which expects 'svbool_t'} } */
+ svrshl_x (1, s16, s32); /* { dg-error {passing 'int' to argument 1 of 'svrshl_x', which expects 'svbool_t'} } */
+ svrshl_x (pg, pg, s16); /* { dg-error {'svrshl_x' has no form that takes 'svbool_t' arguments} } */
+ svrshl_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svrshl_x', which expects an SVE type rather than a scalar} } */
+ svrshl_x (pg, s16, s16);
+ svrshl_x (pg, s16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+ svrshl_x (pg, s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+ svrshl_x (pg, s16, s32); /* { dg-error {arguments 2 and 3 of 'svrshl_x' must have the same element size, but the values passed here have type 'svint16_t' and 'svint32_t' respectively} } */
+ svrshl_x (pg, s16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+ svrshl_x (pg, s16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+ svrshl_x (pg, s16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+ svrshl_x (pg, s16, 0);
+ svrshl_x (pg, f16, s16); /* { dg-error {'svrshl_x' has no form that takes 'svfloat16_t' arguments} } */
+ svrshl_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+ svrshl_x (pg, f16, s32); /* { dg-error {'svrshl_x' has no form that takes 'svfloat16_t' arguments} } */
+ svrshl_x (pg, f16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+ svrshl_x (pg, u16, s16);
+
+ svrshl_x (pg, s32x2, s32x2); /* { dg-error {'svrshl_x' has no form that takes 'svint32x2_t' arguments} } */
+ svrshl_x (pg, s32x2, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 3 of 'svrshl_x', which expects vectors of signed integers} } */
+ svrshl_x (pg, s32x2, s32); /* { dg-error {'svrshl_x' has no form that takes 'svint32x2_t' arguments} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#pragma GCC target "+sme2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16x2_t f16x2, svint16x2_t s16x2, svuint16x2_t u16x2,
+ svfloat32x2_t f32x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+ svfloat32_t f32)
+ __arm_streaming
+{
+ svrshl (s16x2); /* { dg-error {too few arguments to function 'svrshl'} } */
+ svrshl (s16x2, s16x2, s16x2); /* { dg-error {too many arguments to function 'svrshl'} } */
+ svrshl (pg, s16x2); /* { dg-error {'svrshl' has no form that takes 'svbool_t' arguments} } */
+ svrshl (1, s16x2); /* { dg-error {passing 'int' to argument 1 of 'svrshl', which expects an SVE type rather than a scalar} } */
+ svrshl (s16, s16); /* { dg-error {'svrshl' has no form that takes 'svint16_t' arguments} } */
+ svrshl (s16x2, s16x2);
+ svrshl (s16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+ svrshl (s16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+ svrshl (s16x2, s32x2); /* { dg-error {arguments 1 and 2 of 'svrshl' must have the same element size, but the values passed here have type 'svint16x2_t' and 'svint32x2_t' respectively} } */
+ svrshl (s32x2, s16); /* { dg-error {arguments 1 and 2 of 'svrshl' must have the same element size, but the values passed here have type 'svint32x2_t' and 'svint16_t' respectively} } */
+ svrshl (s32x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */
+ svrshl (s32x2, s32);
+ svrshl (s32x2, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */
+ svrshl (s32x2, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */
+ svrshl (s16x2, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+ svrshl (s16x2, f32x2); /* { dg-error {passing 'svfloat32x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+ svrshl (s16x2, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */
+ svrshl (s16x2, 0); /* { dg-error {passing 'int' to argument 2 of 'svrshl', which expects an SVE type rather than a scalar type} } */
+ svrshl (f16x2, s16x2); /* { dg-error {'svrshl' has no form that takes 'svfloat16x2_t' arguments} } */
+ svrshl (f16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+ svrshl (f16x2, s32x2); /* { dg-error {'svrshl' has no form that takes 'svfloat16x2_t' arguments} } */
+ svrshl (u16x2, s16x2);
+}
--- /dev/null
+/* { dg-do compile } */
+
+#pragma GCC target "+sve2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16,
+ svfloat32_t f32, svint32_t s32, svuint32_t u32, svint32x2_t s32x2,
+ svuint32x2_t u32x2)
+{
+ svqdmulh (s16); /* { dg-error {too few arguments to function 'svqdmulh'} } */
+ svqdmulh (s16, s16, s16); /* { dg-error {too many arguments to function 'svqdmulh'} } */
+ svqdmulh (pg, pg); /* { dg-error {'svqdmulh' has no form that takes 'svbool_t' arguments} } */
+ svqdmulh (1, s16); /* { dg-error {passing 'int' to argument 1 of 'svqdmulh', which expects an SVE type rather than a scalar} } */
+ svqdmulh (s16, s16);
+ svqdmulh (s16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16_t'} } */
+ svqdmulh (s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16_t'} } */
+ svqdmulh (s16, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16_t'} } */
+ svqdmulh (s32, s32x2); /* { dg-error {passing tuple 'svint32x2_t' to argument 2 of 'svqdmulh' after passing single vector 'svint32_t' to argument 1} } */
+ svqdmulh (s16, 0);
+ svqdmulh (f16, f16); /* { dg-error {'svqdmulh' has no form that takes 'svfloat16_t' arguments} } */
+ svqdmulh (u16, u16); /* { dg-error {'svqdmulh' has no form that takes 'svuint16_t' arguments} } */
+
+ svqdmulh (s32x2, s32x2); /* { dg-error {ACLE function 'svqdmulh_s32_x2' can only be called when SME streaming mode is enabled} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#pragma GCC target "+sme2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16x2_t f16x2, svint16x2_t s16x2, svuint16x2_t u16x2,
+ svfloat32x2_t f32x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint32x3_t s32x3, svint32x4_t s32x4,
+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+ svfloat32_t f32)
+ __arm_streaming
+{
+ svqdmulh (s16x2); /* { dg-error {too few arguments to function 'svqdmulh'} } */
+ svqdmulh (s16x2, s16x2, s16x2); /* { dg-error {too many arguments to function 'svqdmulh'} } */
+ svqdmulh (pg, s16x2); /* { dg-error {'svqdmulh' has no form that takes 'svbool_t' arguments} } */
+ svqdmulh (1, s16x2); /* { dg-error {passing 'int' to argument 1 of 'svqdmulh', which expects an SVE type rather than a scalar} } */
+ svqdmulh (s16, s16);
+ svqdmulh (s16x2, s16x2);
+ svqdmulh (s16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16x2_t'} } */
+ svqdmulh (s16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16x2_t'} } */
+ svqdmulh (s32x2, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svqdmulh', but argument 1 was a tuple of 'svint32_t'} } */
+ svqdmulh (s32x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svqdmulh', but argument 1 was a tuple of 'svint32_t'} } */
+ svqdmulh (s32x2, s32);
+ svqdmulh (s32x2, s32x3); /* { dg-error {passing mismatched tuple types 'svint32x2_t' and 'svint32x3_t' to arguments 1 and 2 of 'svqdmulh'} } */
+ svqdmulh (s32x2, s32x4); /* { dg-error {passing mismatched tuple types 'svint32x2_t' and 'svint32x4_t' to arguments 1 and 2 of 'svqdmulh'} } */
+ svqdmulh (s32x3, s32x2); /* { dg-error {'svqdmulh' has no form that takes 'svint32x3_t' arguments} } */
+ svqdmulh (s32x3, s32x3); /* { dg-error {'svqdmulh' has no form that takes 'svint32x3_t' arguments} } */
+ svqdmulh (s32x4, s32x2); /* { dg-error {passing mismatched tuple types 'svint32x4_t' and 'svint32x2_t' to arguments 1 and 2 of 'svqdmulh'} } */
+ svqdmulh (s32x4, s32x3); /* { dg-error {passing mismatched tuple types 'svint32x4_t' and 'svint32x3_t' to arguments 1 and 2 of 'svqdmulh'} } */
+ svqdmulh (s32x4, s32x4);
+ svqdmulh (u32x2, u32x2); /* { dg-error {'svqdmulh' has no form that takes 'svuint32x2_t' arguments} } */
+ svqdmulh (u32x2, u32); /* { dg-error {'svqdmulh' has no form that takes 'svuint32x2_t' arguments} } */
+
+ svqdmulh (s16x2, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svqdmulh', but argument 1 was a tuple of 'svint16_t'} } */
+ svqdmulh (s16x2, 0); /* { dg-error {passing 'int' to argument 2 of 'svqdmulh', which expects an SVE type rather than a scalar type} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#pragma GCC target "+sme2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16x2_t f16x2, svint16x2_t s16x2, svuint16x2_t u16x2,
+ svfloat32x2_t f32x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint32x3_t s32x3, svint32x4_t s32x4,
+ svint16_t s16, svuint16_t u16, svfloat16_t f16, svint32_t s32,
+ svuint32_t u32, svfloat32_t f32)
+ __arm_streaming
+{
+ svadd (s16x2); /* { dg-error {too few arguments to function 'svadd'} } */
+ svadd (s16x2, s16x2, s16x2); /* { dg-error {too many arguments to function 'svadd'} } */
+ svadd (pg, s16x2); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */
+ svadd (1, s16x2); /* { dg-error {passing 'int' to argument 1 of 'svadd', which expects an SVE type rather than a scalar} } */
+ svadd (s16, s16); /* { dg-error {'svadd' has no form that takes 'svint16_t' arguments} } */
+ svadd (s16x2, s16x2); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */
+ svadd (s16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */
+ svadd (s16x2, s16);
+ svadd (s16x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint16_t'} } */
+ svadd (s16x2, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint16_t'} } */
+ svadd (s32x2, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint32_t'} } */
+ svadd (s32x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint32_t'} } */
+ svadd (s32x2, s32);
+ svadd (s32x3, s32); /* { dg-error {'svadd' has no form that takes 'svint32x3_t' arguments} } */
+ svadd (s32x4, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */
+ svadd (f32x2, f32); /* { dg-error {'svadd' has no form that takes 'svfloat32x2_t' arguments} } */
+
+ svadd (s16x2, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint16_t'} } */
+ svadd (s16x2, 0); /* { dg-error {passing 'int' to argument 2 of 'svadd', which expects an SVE type rather than a scalar type} } */
+}
svmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {passing 'svint8_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svuint8_t'} } */
svmopa_za32_m (0, pg, pg, s8, f16); /* { dg-error {passing 'svfloat16_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svint8_t'} } */
svmopa_za32_m (0, pg, pg, pg, pg); /* { dg-error {'svmopa_za32_m' has no form that takes 'svbool_t' arguments} } */
- svmopa_za32_m (0, pg, pg, s16, s16); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint16_t' arguments} } */
svmopa_za32_m (0, pg, pg, s32, s32); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint32_t' arguments} } */
svmopa_za32_m (0, pg, pg, f64, f64); /* { dg-error {'svmopa_za32_m' has no form that takes 'svfloat64_t' arguments} } */
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svint8_t s8, svuint8_t u8,
+ svint16x2_t s16x2, svuint16x2_t u16x2, svint8x2_t s8x2, svuint8x2_t u8x2,
+ svint8x3_t s8x3, svuint8x3_t u8x3,
+ svint8x4_t s8x4, svuint8x4_t u8x4,
+ svint64x2_t s64x2, svuint64x2_t u64x2,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svusdot_za32_vg1x2 (1, u8x2); /* { dg-error {too few arguments to function 'svusdot_za32_vg1x2'} } */
+ svusdot_za32_vg1x2 (1, u8x2, s8x2, s8x2); /* { dg-error {too many arguments to function 'svusdot_za32_vg1x2'} } */
+
+ svusdot_za32_vg1x2 (s8x2, u8x2, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 1 of 'svusdot_za32_vg1x2', which expects 'uint32_t'} } */
+ svusdot_za32_vg1x2 (f, u8x2, s8x2);
+ svusdot_za32_vg1x2 (d, u8x2, s8x2);
+ svusdot_za32_vg1x2 (pg, u8x2, s8x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svusdot_za32_vg1x2', which expects 'uint32_t'} } */
+
+ svusdot_za32_vg1x2 (1, 1, s8x2); /* { dg-error {passing 'int' to argument 2 of 'svusdot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svusdot_za32_vg1x2 (1, pg, s8x2); /* { dg-error {passing 'svbool_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_za32_vg1x2 (1, s8, s8x2); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_za32_vg1x2 (1, u8x3, s8x3); /* { dg-error {passing 'svuint8x3_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_za32_vg1x2 (1, u8x4, s8x4); /* { dg-error {passing 'svuint8x4_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+ svusdot_za32_vg1x2 (1, u8x2, 1); /* { dg-error {passing 'int' to argument 3 of 'svusdot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svusdot_za32_vg1x2 (1, u8x2, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a vector of signed integers} } */
+ svusdot_za32_vg1x2 (1, u8x2, s16); /* { dg-error {arguments 2 and 3 of 'svusdot_za32_vg1x2' must have the same element size, but the values passed here have type 'svuint8x2_t' and 'svint16_t' respectively} } */
+ svusdot_za32_vg1x2 (1, u8x2, s16x2); /* { dg-error {arguments 2 and 3 of 'svusdot_za32_vg1x2' must have the same element size, but the values passed here have type 'svuint8x2_t' and 'svint16x2_t' respectively} } */
+ svusdot_za32_vg1x2 (1, u8x2, s8);
+ svusdot_za32_vg1x2 (1, u8x2, s8x2);
+ svusdot_za32_vg1x2 (1, u8x2, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_za32_vg1x2 (1, u8x2, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_za32_vg1x2 (1, u8x2, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a vector of signed integers} } */
+ svusdot_za32_vg1x2 (1, u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svusdot_za32_vg1x2', which expects vectors of signed integers} } */
+ svusdot_za32_vg1x2 (1, u8x2, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_za32_vg1x2 (1, u8x2, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_za32_vg1x2 (1, s8x2, s8); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svint8x2_t' arguments} } */
+ svusdot_za32_vg1x2 (1, s8x2, s8x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svint8x2_t' arguments} } */
+
+ svusdot_za32_vg1x2 (1, u16x2, s16); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svuint16x2_t' arguments} } */
+ svusdot_za32_vg1x2 (1, u16x2, s16x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svuint16x2_t' arguments} } */
+ svusdot_za32_vg1x2 (1, s64x2, s64x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+ svusdot_za32_vg1x2 (1, u64x2, s64x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
+
+void
+f2 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_streaming
+{
+ svusdot_za32_vg1x2 (0, u8x2, s8x2); /* { dg-error {ACLE function 'svusdot_za32_u8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_inout("za")
+{
+ svusdot_za32_vg1x2 (0, u8x2, s8x2); /* { dg-error {ACLE function 'svusdot_za32_u8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint16x3_t s16x3, svuint16x3_t u16x3,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svmla_lane_za32_vg2x1 (0, s16, s16); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg2x1'} } */
+ svmla_lane_za32_vg2x1 (0, s16, s16, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg2x1'} } */
+
+ svmla_lane_za32_vg2x1 (s16, s16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 1 of 'svmla_lane_za32_vg2x1', which expects 'uint32_t'} } */
+ svmla_lane_za32_vg2x1 (f, s16, s16, 0);
+ svmla_lane_za32_vg2x1 (d, s16, s16, 0);
+ svmla_lane_za32_vg2x1 (pg, s16, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svmla_lane_za32_vg2x1', which expects 'uint32_t'} } */
+
+ svmla_lane_za32_vg2x1 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane_za32_vg2x1', which expects an SVE type rather than a scalar type} } */
+ svmla_lane_za32_vg2x1 (0, pg, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svbool_t'} } */
+ svmla_lane_za32_vg2x1 (0, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svmla_lane_za32_vg2x1', which expects a single SVE vector rather than a tuple} } */
+ svmla_lane_za32_vg2x1 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svmla_lane_za32_vg2x1', which expects a single SVE vector rather than a tuple} } */
+
+ svmla_lane_za32_vg2x1 (0, s16, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane_za32_vg2x1', which expects an SVE type rather than a scalar type} } */
+ svmla_lane_za32_vg2x1 (0, s16, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svint16_t'} } */
+ svmla_lane_za32_vg2x1 (0, s16, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svint16_t'} } */
+ svmla_lane_za32_vg2x1 (0, s16, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svint16_t'} } */
+ svmla_lane_za32_vg2x1 (0, s16, s16x2, 0); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svmla_lane_za32_vg2x1', which expects a single SVE vector rather than a tuple} } */
+ svmla_lane_za32_vg2x1 (0, u16, u16, 0);
+ svmla_lane_za32_vg2x1 (0, u16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svuint16_t'} } */
+ svmla_lane_za32_vg2x1 (0, s32, s32, 0); /* { dg-error {'svmla_lane_za32_vg2x1' has no form that takes 'svint32_t' arguments} } */
+ svmla_lane_za32_vg2x1 (0, u32, u32, 0); /* { dg-error {'svmla_lane_za32_vg2x1' has no form that takes 'svuint32_t' arguments} } */
+
+ svmla_lane_za32_vg2x1 (0, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg2x1', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za32_vg2x1 (0, s16, s16, 7);
+ svmla_lane_za32_vg2x1 (0, s16, s16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za32_vg2x1', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za32_vg2x1 (0, s16, s16, f); /* { dg-error {argument 4 of 'svmla_lane_za32_vg2x1' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x2_t s16x2, svint16_t s16) __arm_streaming
+{
+ svmla_lane_za32_vg2x1 (0, s16, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x1' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x2_t s16x2, svint16_t s16) __arm_inout("za")
+{
+ svmla_lane_za32_vg2x1 (0, s16, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x1' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+ svint32_t s32, svuint32_t u32,
+ svint64_t s64, svuint64_t u64)
+ __arm_streaming __arm_inout("za")
+{
+ svmla_lane_za64_vg4x1 (0, s16, s16, 0);
+ svmla_lane_za64_vg4x1 (0, u16, u16, 0);
+ svmla_lane_za64_vg4x1 (0, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za64_vg4x1', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za64_vg4x1 (0, s16, s16, 7);
+ svmla_lane_za64_vg4x1 (0, u16, u16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za64_vg4x1', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za64_vg4x1 (0, s32, s32, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svint32_t' arguments} } */
+ svmla_lane_za64_vg4x1 (0, u32, u32, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svuint32_t' arguments} } */
+ svmla_lane_za64_vg4x1 (0, s64, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svint64_t' arguments} } */
+ svmla_lane_za64_vg4x1 (0, u64, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svuint64_t' arguments} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint16x3_t s16x3, svuint16x3_t u16x3,
+ svint16x4_t s16x4, svuint16x4_t u16x4,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svmla_lane_za32_vg2x2 (0, s16x2, s16); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg2x2'} } */
+ svmla_lane_za32_vg2x2 (0, s16x2, s16, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg2x2'} } */
+
+ svmla_lane_za32_vg2x2 (s16x2, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 1 of 'svmla_lane_za32_vg2x2', which expects 'uint32_t'} } */
+ svmla_lane_za32_vg2x2 (f, s16x2, s16, 0);
+ svmla_lane_za32_vg2x2 (d, s16x2, s16, 0);
+ svmla_lane_za32_vg2x2 (pg, s16x2, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svmla_lane_za32_vg2x2', which expects 'uint32_t'} } */
+
+ svmla_lane_za32_vg2x2 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane_za32_vg2x2', which expects an SVE type rather than a scalar type} } */
+ svmla_lane_za32_vg2x2 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */
+ svmla_lane_za32_vg2x2 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */
+ svmla_lane_za32_vg2x2 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */
+ svmla_lane_za32_vg2x2 (0, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */
+
+ svmla_lane_za32_vg2x2 (0, s16x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane_za32_vg2x2', which expects an SVE type rather than a scalar type} } */
+ svmla_lane_za32_vg2x2 (0, s16x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svint16_t'} } */
+ svmla_lane_za32_vg2x2 (0, s16x2, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svint16_t'} } */
+ svmla_lane_za32_vg2x2 (0, s16x2, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svint16_t'} } */
+ svmla_lane_za32_vg2x2 (0, s16x2, s16x2, 0); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svmla_lane_za32_vg2x2', which expects a single SVE vector rather than a tuple} } */
+ svmla_lane_za32_vg2x2 (0, u16x2, u16, 0);
+ svmla_lane_za32_vg2x2 (0, u16x2, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svuint16_t'} } */
+ svmla_lane_za32_vg2x2 (0, s32x2, s32, 0); /* { dg-error {'svmla_lane_za32_vg2x2' has no form that takes 'svint32x2_t' arguments} } */
+ svmla_lane_za32_vg2x2 (0, u32x2, u32, 0); /* { dg-error {'svmla_lane_za32_vg2x2' has no form that takes 'svuint32x2_t' arguments} } */
+
+ svmla_lane_za32_vg2x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg2x2', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za32_vg2x2 (0, s16x2, s16, 7);
+ svmla_lane_za32_vg2x2 (0, s16x2, s16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za32_vg2x2', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za32_vg2x2 (0, s16x2, s16, f); /* { dg-error {argument 4 of 'svmla_lane_za32_vg2x2' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x2_t s16x2, svint16_t s16) __arm_streaming
+{
+ svmla_lane_za32_vg2x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x2_t s16x2, svint16_t s16) __arm_inout("za")
+{
+ svmla_lane_za32_vg2x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x2' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ svint32_t s32, svuint32_t u32,
+ svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint64_t s64, svuint64_t u64,
+ svint64x2_t s64x2, svuint64x2_t u64x2)
+ __arm_streaming __arm_inout("za")
+{
+ svmla_lane_za64_vg4x2 (0, s16x2, s16, 0);
+ svmla_lane_za64_vg4x2 (0, u16x2, u16, 0);
+ svmla_lane_za64_vg4x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za64_vg4x2', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za64_vg4x2 (0, s16x2, s16, 7);
+ svmla_lane_za64_vg4x2 (0, u16x2, u16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za64_vg4x2', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za64_vg4x2 (0, s32x2, s32, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svint32x2_t' arguments} } */
+ svmla_lane_za64_vg4x2 (0, u32x2, u32, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svuint32x2_t' arguments} } */
+ svmla_lane_za64_vg4x2 (0, s64x2, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svint64x2_t' arguments} } */
+ svmla_lane_za64_vg4x2 (0, u64x2, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ svint16x3_t s16x3, svuint16x3_t u16x3,
+ svint16x4_t s16x4, svuint16x4_t u16x4,
+ svint32x4_t s32x4, svuint32x4_t u32x4,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svmla_lane_za32_vg2x4 (0, s16x4, s16); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg2x4'} } */
+ svmla_lane_za32_vg2x4 (0, s16x4, s16, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg2x4'} } */
+
+ svmla_lane_za32_vg2x4 (s16x4, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 1 of 'svmla_lane_za32_vg2x4', which expects 'uint32_t'} } */
+ svmla_lane_za32_vg2x4 (f, s16x4, s16, 0);
+ svmla_lane_za32_vg2x4 (d, s16x4, s16, 0);
+ svmla_lane_za32_vg2x4 (pg, s16x4, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svmla_lane_za32_vg2x4', which expects 'uint32_t'} } */
+
+ svmla_lane_za32_vg2x4 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane_za32_vg2x4', which expects an SVE type rather than a scalar type} } */
+ svmla_lane_za32_vg2x4 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */
+ svmla_lane_za32_vg2x4 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */
+ svmla_lane_za32_vg2x4 (0, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */
+ svmla_lane_za32_vg2x4 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */
+
+ svmla_lane_za32_vg2x4 (0, s16x4, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane_za32_vg2x4', which expects an SVE type rather than a scalar type} } */
+ svmla_lane_za32_vg2x4 (0, s16x4, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svint16_t'} } */
+ svmla_lane_za32_vg2x4 (0, s16x4, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svint16_t'} } */
+ svmla_lane_za32_vg2x4 (0, s16x4, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svint16_t'} } */
+ svmla_lane_za32_vg2x4 (0, s16x4, s16x4, 0); /* { dg-error {passing 'svint16x4_t' to argument 3 of 'svmla_lane_za32_vg2x4', which expects a single SVE vector rather than a tuple} } */
+ svmla_lane_za32_vg2x4 (0, u16x4, u16, 0);
+ svmla_lane_za32_vg2x4 (0, u16x4, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svuint16_t'} } */
+ svmla_lane_za32_vg2x4 (0, s32x4, s32, 0); /* { dg-error {'svmla_lane_za32_vg2x4' has no form that takes 'svint32x4_t' arguments} } */
+ svmla_lane_za32_vg2x4 (0, u32x4, u32, 0); /* { dg-error {'svmla_lane_za32_vg2x4' has no form that takes 'svuint32x4_t' arguments} } */
+
+ svmla_lane_za32_vg2x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg2x4', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za32_vg2x4 (0, s16x4, s16, 7);
+ svmla_lane_za32_vg2x4 (0, s16x4, s16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za32_vg2x4', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za32_vg2x4 (0, s16x4, s16, f); /* { dg-error {argument 4 of 'svmla_lane_za32_vg2x4' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x4_t s16x4, svint16_t s16) __arm_streaming
+{
+ svmla_lane_za32_vg2x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x4' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x4_t s16x4, svint16_t s16) __arm_inout("za")
+{
+ svmla_lane_za32_vg2x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x4' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+ svint16x4_t s16x4, svuint16x4_t u16x4,
+ svint32_t s32, svuint32_t u32,
+ svint32x4_t s32x4, svuint32x4_t u32x4,
+ svint64_t s64, svuint64_t u64,
+ svint64x4_t s64x4, svuint64x4_t u64x4)
+ __arm_streaming __arm_inout("za")
+{
+ svmla_lane_za64_vg4x4 (0, s16x4, s16, 0);
+ svmla_lane_za64_vg4x4 (0, u16x4, u16, 0);
+ svmla_lane_za64_vg4x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za64_vg4x4', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za64_vg4x4 (0, s16x4, s16, 7);
+ svmla_lane_za64_vg4x4 (0, u16x4, u16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za64_vg4x4', which expects a value in the range \[0, 7\]} } */
+ svmla_lane_za64_vg4x4 (0, s32x4, s32, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svint32x4_t' arguments} } */
+ svmla_lane_za64_vg4x4 (0, u32x4, u32, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svuint32x4_t' arguments} } */
+ svmla_lane_za64_vg4x4 (0, s64x4, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svint64x4_t' arguments} } */
+ svmla_lane_za64_vg4x4 (0, u64x4, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svuint64x4_t' arguments} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+ svint32_t s32, svuint32_t u32, svfloat32_t f32,
+ svint32x2_t s32x2, svuint32x2_t u32x2, svfloat32x2_t f32x2, int i)
+ __arm_streaming __arm_inout("za")
+{
+ svmla_lane_za32_vg4x1 (0, s8, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg4x1', which expects a value in the range \[0, 15\]} } */
+ svmla_lane_za32_vg4x1 (0, u8, u8, 0);
+ svmla_lane_za32_vg4x1 (0, s8, s8, 15);
+ svmla_lane_za32_vg4x1 (0, u8, u8, 16); /* { dg-error {passing 16 to argument 4 of 'svmla_lane_za32_vg4x1', which expects a value in the range \[0, 15\]} } */
+ svmla_lane_za32_vg4x1 (0, s16, s16, 0); /* { dg-error {'svmla_lane_za32_vg4x1' has no form that takes 'svint16_t' arguments} } */
+ svmla_lane_za32_vg4x1 (0, u16, u16, 0); /* { dg-error {'svmla_lane_za32_vg4x1' has no form that takes 'svuint16_t' arguments} } */
+
+ svmla_lane_za32_vg1x2 (0, s32x2, s32, 0); /* { dg-error {'svmla_lane_za32_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+ svmla_lane_za32_vg1x2 (0, u32x2, u32, 0); /* { dg-error {'svmla_lane_za32_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+ svmla_lane_za32_vg1x2 (0, f32x2, f32, 0);
+ svmla_lane_za32_vg1x2 (0, f32x2, f32, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svmla_lane_za32_vg1x2 (0, f32x2, f32, 4); /* { dg-error {passing 4 to argument 4 of 'svmla_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svmla_lane_za32_vg1x2 (0, f32x2, f32, i); /* { dg-error {argument 4 of 'svmla_lane_za32_vg1x2' must be an integer constant expression} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svint32_t s32, svuint32_t u32,
+ svint16x2_t s16x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint32x3_t s32x3, svuint32x3_t u32x3,
+ svint32x4_t s32x4, svuint32x4_t u32x4,
+ svint64x2_t s64x2, svuint64x2_t u64x2,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svadd_write_za32_vg1x2 (1, s32x2); /* { dg-error {too few arguments to function 'svadd_write_za32_vg1x2'} } */
+ svadd_write_za32_vg1x2 (1, s32x2, s32x2, s32x2); /* { dg-error {too many arguments to function 'svadd_write_za32_vg1x2'} } */
+
+ svadd_write_za32_vg1x2 (s32x2, s32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svadd_write_za32_vg1x2', which expects 'uint32_t'} } */
+ svadd_write_za32_vg1x2 (f, s32x2, s32x2);
+ svadd_write_za32_vg1x2 (d, s32x2, s32x2);
+ svadd_write_za32_vg1x2 (pg, s32x2, s32x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_write_za32_vg1x2', which expects 'uint32_t'} } */
+
+ svadd_write_za32_vg1x2 (1, 1, s32x2); /* { dg-error {passing 'int' to argument 2 of 'svadd_write_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svadd_write_za32_vg1x2 (1, pg, s32x2); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_write_za32_vg1x2 (1, s32, s32x2); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_write_za32_vg1x2 (1, s32x3, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_write_za32_vg1x2 (1, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+ svadd_write_za32_vg1x2 (1, s32x2, 1); /* { dg-error {passing 'int' to argument 3 of 'svadd_write_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svadd_write_za32_vg1x2 (1, s32x2, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svint32_t'} } */
+ svadd_write_za32_vg1x2 (1, s32x2, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svint32_t'} } */
+ svadd_write_za32_vg1x2 (1, s32x2, s16x2); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 had type 'svint32x2_t'} } */
+ svadd_write_za32_vg1x2 (1, s32x2, s32);
+ svadd_write_za32_vg1x2 (1, s32x2, s32x2);
+ svadd_write_za32_vg1x2 (1, s32x2, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_write_za32_vg1x2 (1, s32x2, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_write_za32_vg1x2 (1, s32x2, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svint32_t'} } */
+ svadd_write_za32_vg1x2 (1, s32x2, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 had type 'svint32x2_t'} } */
+ svadd_write_za32_vg1x2 (1, s32x2, u32x3); /* { dg-error {passing 'svuint32x3_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_write_za32_vg1x2 (1, s32x2, u32x4); /* { dg-error {passing 'svuint32x4_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_write_za32_vg1x2 (1, u32x2, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svuint32_t'} } */
+ svadd_write_za32_vg1x2 (1, u32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 had type 'svuint32x2_t'} } */
+ svadd_write_za32_vg1x2 (1, u32x2, u32);
+ svadd_write_za32_vg1x2 (1, u32x2, u32x2);
+
+ svadd_write_za32_vg1x2 (1, s16x2, s16); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+ svadd_write_za32_vg1x2 (1, s16x2, s16x2); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+ svadd_write_za32_vg1x2 (1, s64x2, s64x2); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+ svadd_write_za32_vg1x2 (1, u64x2, u64x2); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
+
+void
+f2 (svint32x2_t s32x2) __arm_streaming
+{
+ svadd_write_za32_vg1x2 (0, s32x2, s32x2); /* { dg-error {ACLE function 'svadd_write_za32_s32_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint32x2_t s32x2) __arm_inout("za")
+{
+ svadd_write_za32_vg1x2 (0, s32x2, s32x2); /* { dg-error {ACLE function 'svadd_write_za32_s32_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint64x2_t s64x2, svuint64x2_t u64x2)
+ __arm_streaming __arm_inout("za")
+{
+ svadd_write_za64_vg1x2 (1, s32x2, s32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+ svadd_write_za64_vg1x2 (1, u32x2, u32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+ svadd_write_za64_vg1x2 (1, s64x2, s64x2);
+ svadd_write_za64_vg1x2 (1, u64x2, u64x2);
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svint32_t s32, svuint32_t u32,
+ svint16x2_t s16x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint32x3_t s32x3, svuint32x3_t u32x3,
+ svint32x4_t s32x4, svuint32x4_t u32x4,
+ svint64x2_t s64x2, svuint64x2_t u64x2,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svadd_write_za32_vg1x4 (1, s32x4); /* { dg-error {too few arguments to function 'svadd_write_za32_vg1x4'} } */
+ svadd_write_za32_vg1x4 (1, s32x4, s32x4, s32x4); /* { dg-error {too many arguments to function 'svadd_write_za32_vg1x4'} } */
+
+ svadd_write_za32_vg1x4 (s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svadd_write_za32_vg1x4', which expects 'uint32_t'} } */
+ svadd_write_za32_vg1x4 (f, s32x4, s32x4);
+ svadd_write_za32_vg1x4 (d, s32x4, s32x4);
+ svadd_write_za32_vg1x4 (pg, s32x4, s32x4); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_write_za32_vg1x4', which expects 'uint32_t'} } */
+
+ svadd_write_za32_vg1x4 (1, 1, s32x4); /* { dg-error {passing 'int' to argument 2 of 'svadd_write_za32_vg1x4', which expects an SVE type rather than a scalar} } */
+ svadd_write_za32_vg1x4 (1, pg, s32x4); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */
+ svadd_write_za32_vg1x4 (1, s32, s32x4); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */
+ svadd_write_za32_vg1x4 (1, s32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */
+ svadd_write_za32_vg1x4 (1, s32x3, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2+nosme-i16i64")
+
+void
+f1 (svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint64x2_t s64x2, svuint64x2_t u64x2)
+ __arm_streaming __arm_inout("za")
+{
+ svadd_write_za64_vg1x2 (1, s32x2, s32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+ svadd_write_za64_vg1x2 (1, u32x2, u32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+ svadd_write_za64_vg1x2 (1, s64x2, s64x2); /* { dg-error {ACLE function 'svadd_write_za64_s64_vg1x2' requires ISA extension 'sme-i16i64'} } */
+ svadd_write_za64_vg1x2 (1, u64x2, u64x2);
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svuint16_t u16, svint8_t s8, svuint8_t u8,
+ svint16x2_t s16x2, svuint16x2_t u16x2, svint8x2_t s8x2, svuint8x2_t u8x2,
+ svint8x3_t s8x3, svuint8x3_t u8x3,
+ svint8x4_t s8x4, svuint8x4_t u8x4,
+ svint64x2_t s64x2, svuint64x2_t u64x2,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svsudot_za32_vg1x2 (1, s8x2); /* { dg-error {too few arguments to function 'svsudot_za32_vg1x2'} } */
+ svsudot_za32_vg1x2 (1, s8x2, u8x2, u8x2); /* { dg-error {too many arguments to function 'svsudot_za32_vg1x2'} } */
+
+ svsudot_za32_vg1x2 (s8x2, s8x2, u8x2); /* { dg-error {passing 'svint8x2_t' to argument 1 of 'svsudot_za32_vg1x2', which expects 'uint32_t'} } */
+ svsudot_za32_vg1x2 (f, s8x2, u8x2);
+ svsudot_za32_vg1x2 (d, s8x2, u8x2);
+ svsudot_za32_vg1x2 (pg, s8x2, u8x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svsudot_za32_vg1x2', which expects 'uint32_t'} } */
+
+ svsudot_za32_vg1x2 (1, 1, u8x2); /* { dg-error {passing 'int' to argument 2 of 'svsudot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svsudot_za32_vg1x2 (1, pg, u8x2); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_za32_vg1x2 (1, s8, u8x2); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_za32_vg1x2 (1, s8x3, u8x3); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_za32_vg1x2 (1, s8x4, u8x4); /* { dg-error {passing 'svint8x4_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+ svsudot_za32_vg1x2 (1, s8x2, 1); /* { dg-error {passing 'int' to argument 3 of 'svsudot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svsudot_za32_vg1x2 (1, s8x2, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a vector of unsigned integers} } */
+ svsudot_za32_vg1x2 (1, s8x2, u16); /* { dg-error {arguments 2 and 3 of 'svsudot_za32_vg1x2' must have the same element size, but the values passed here have type 'svint8x2_t' and 'svuint16_t' respectively} } */
+ svsudot_za32_vg1x2 (1, s8x2, u16x2); /* { dg-error {arguments 2 and 3 of 'svsudot_za32_vg1x2' must have the same element size, but the values passed here have type 'svint8x2_t' and 'svuint16x2_t' respectively} } */
+ svsudot_za32_vg1x2 (1, s8x2, u8);
+ svsudot_za32_vg1x2 (1, s8x2, u8x2);
+ svsudot_za32_vg1x2 (1, s8x2, u8x3); /* { dg-error {passing 'svuint8x3_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_za32_vg1x2 (1, s8x2, u8x4); /* { dg-error {passing 'svuint8x4_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_za32_vg1x2 (1, s8x2, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a vector of unsigned integers} } */
+ svsudot_za32_vg1x2 (1, s8x2, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svsudot_za32_vg1x2', which expects vectors of unsigned integers} } */
+ svsudot_za32_vg1x2 (1, s8x2, u8x3); /* { dg-error {passing 'svuint8x3_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_za32_vg1x2 (1, s8x2, u8x4); /* { dg-error {passing 'svuint8x4_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_za32_vg1x2 (1, u8x2, u8); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svuint8x2_t' arguments} } */
+ svsudot_za32_vg1x2 (1, u8x2, u8x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svuint8x2_t' arguments} } */
+
+ svsudot_za32_vg1x2 (1, s16x2, u16); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+ svsudot_za32_vg1x2 (1, s16x2, u16x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+ svsudot_za32_vg1x2 (1, s64x2, u64x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+ svsudot_za32_vg1x2 (1, u64x2, u64x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
+
+void
+f2 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_streaming
+{
+ svsudot_za32_vg1x2 (0, s8x2, u8x2); /* { dg-error {ACLE function 'svsudot_za32_s8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_inout("za")
+{
+ svsudot_za32_vg1x2 (0, s8x2, u8x2); /* { dg-error {ACLE function 'svsudot_za32_s8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svcount_t pn, svuint8_t u8, svint16_t s16,
+ svuint8x2_t u8x2, svuint8x3_t u8x3, svuint8x4_t u8x4)
+{
+ svsel (pg, u8); /* { dg-error {too few arguments to function 'svsel'} } */
+ svsel (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svsel'} } */
+ svsel (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */
+ svsel (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */
+ svsel (pn, u8, u8); /* { dg-error {operations on single vectors must be predicated by 'svbool_t' rather than 'svcount_t'} } */
+ svsel (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svsel', but argument 2 had type 'svbool_t'} } */
+ svsel (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8_t'} } */
+ svsel (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8_t'} } */
+ svsel (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svsel', which expects an SVE type rather than a scalar} } */
+ svsel (pg, pg, pg);
+ svsel (pg, u8, u8);
+ svsel (pg, u8, u8x2); /* { dg-error {passing tuple 'svuint8x2_t' to argument 3 of 'svsel' after passing single vector 'svuint8_t' to argument 2} } */
+ svsel (pg, u8, u8x3); /* { dg-error {passing tuple 'svuint8x3_t' to argument 3 of 'svsel' after passing single vector 'svuint8_t' to argument 2} } */
+ svsel (pg, u8, u8x4); /* { dg-error {passing tuple 'svuint8x4_t' to argument 3 of 'svsel' after passing single vector 'svuint8_t' to argument 2} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svbool_t pg, svcount_t pn, svuint8_t u8, svint16_t s16,
+ svint8x2_t s8x2, svint8x3_t s8x3, svint8x4_t s8x4,
+ svuint8x2_t u8x2, svuint8x3_t u8x3, svuint8x4_t u8x4,
+ svuint16x2_t u16x2) __arm_streaming
+{
+ svsel (pn, u8x2); /* { dg-error {too few arguments to function 'svsel'} } */
+ svsel (pn, u8x2, u8x2, u8x2); /* { dg-error {too many arguments to function 'svsel'} } */
+ svsel (0, u8x2, u8x2); /* { dg-error {passing 'int' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */
+ svsel (u8x2, u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */
+ svsel (pg, u8x2, u8x2); /* { dg-error {operations on multiple vectors must be predicated by 'svcount_t' rather than 'svbool_t'} } */
+ svsel (pn, u8x2, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8x2_t'} } */
+ svsel (pn, u8x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8x2_t'} } */
+ svsel (pn, u8x2, 0); /* { dg-error {passing 'int' to argument 3 of 'svsel', which expects an SVE type rather than a scalar} } */
+ svsel (pn, u8x2, u8); /* { dg-error {passing single vector 'svuint8_t' to argument 3 of 'svsel' after passing tuple 'svuint8x2_t' to argument 2} } */
+ svsel (pn, u8x2, u8x2);
+ svsel (pn, u8x2, u8x3); /* { dg-error {passing mismatched tuple types 'svuint8x2_t' and 'svuint8x3_t' to arguments 2 and 3 of 'svsel'} } */
+ svsel (pn, u8x2, s8x3); /* { dg-error {passing mismatched tuple types 'svuint8x2_t' and 'svint8x3_t' to arguments 2 and 3 of 'svsel'} } */
+ svsel (pn, u8x2, u8x4); /* { dg-error {passing mismatched tuple types 'svuint8x2_t' and 'svuint8x4_t' to arguments 2 and 3 of 'svsel'} } */
+ svsel (pn, s8x4, s8x2); /* { dg-error {passing mismatched tuple types 'svint8x4_t' and 'svint8x2_t' to arguments 2 and 3 of 'svsel'} } */
+}
+
+void
+f2 (svcount_t pn, svuint8x2_t u8x2)
+{
+ svsel (pn, u8x2, u8x2); /* { dg-error {ACLE function 'svsel_u8_x2' can only be called when SME streaming mode is enabled} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svcount_t pn, svfloat16_t f16, svint16_t s16, svfloat32_t f32,
+ svfloat16x2_t f16x2, svfloat16x3_t f16x3, svfloat16x4_t f16x4)
+ __arm_streaming
+{
+ svclamp (f16, f16); /* { dg-error {too few arguments to function 'svclamp'} } */
+ svclamp (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svclamp'} } */
+ svclamp (0, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svclamp', which expects an SVE type rather than a scalar type} } */
+ svclamp (f16, f16, f16);
+ svclamp (s16, s16, s16); /* { dg-error {'svclamp' has no form that takes 'svint16_t' arguments} } */
+ svclamp (pn, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svclamp', but argument 1 had type 'svcount_t'} } */
+ svclamp (f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */
+ svclamp (f16, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */
+ svclamp (f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */
+ svclamp (f16, f16, 0); /* { dg-error {passing 'int' to argument 3 of 'svclamp', which expects an SVE type rather than a scalar} } */
+ svclamp (f16, f16x2, f16); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+ svclamp (f16, f16x4, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 2 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+ svclamp (f16, f16, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 3 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+ svclamp (f16, f16, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+
+ svclamp (f16x2, f16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+ svclamp (f16x2, s16, f16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svclamp', but argument 1 was a tuple of 'svfloat16_t'} } */
+ svclamp (f16x2, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svclamp', but argument 1 was a tuple of 'svfloat16_t'} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+#include <stdbool.h>
+
+#pragma GCC target "+sme2"
+
+enum signed_enum { SA = -1, SB };
+enum unsigned_enum { UA, UB };
+
+void
+test (int32_t s32, int64_t s64, uint16_t u16, uint32_t u32, uint64_t u64,
+ bool b, int *ptr, float f32, svbool_t pg, svint32_t vec)
+ __arm_streaming
+{
+ svwhilele_c8 (s64, 2); /* { dg-error {too few arguments to function 'svwhilele_c8'} } */
+ svwhilele_c8 (s64, s64, 2, 2); /* { dg-error {too many arguments to function 'svwhilele_c8'} } */
+
+ svwhilele_c8 (b, b, 2); /* { dg-error {passing '_Bool' and '_Bool' to arguments 1 and 2 of 'svwhilele_c8', which expects a pair of 64-bit integers} } */
+ svwhilele_c8 (u16, u16, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+ svwhilele_c8 (ptr, ptr, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+ svwhilele_c8 (f32, f32, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+ svwhilele_c8 (pg, pg, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+ svwhilele_c8 (vec, vec, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+ svwhilele_c8 (0, 0, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+ svwhilele_c8 (s32, s32, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+
+ svwhilele_c8 (0, s64, 2);
+ svwhilele_c8 (0U, s64, 2);
+ svwhilele_c8 (0, u64, 2); /* { dg-error {mismatched integer types} } */
+ svwhilele_c8 (0U, u64, 2);
+
+ svwhilele_c8 (s32, s64, 2);
+ svwhilele_c8 (u32, s64, 2);
+ svwhilele_c8 (s32, u64, 2); /* { dg-error {mismatched integer types} } */
+ svwhilele_c8 (u32, u64, 2);
+
+ svwhilele_c8 (s64, s64, 2);
+ svwhilele_c8 (u64, s64, 2); /* { dg-error {mismatched integer types} } */
+ svwhilele_c8 (s64, u64, 2); /* { dg-error {mismatched integer types} } */
+ svwhilele_c8 (u64, u64, 2);
+
+ svwhilele_c8 (s64, 0, 2);
+ svwhilele_c8 (s64, 0U, 2);
+ svwhilele_c8 (u64, 0, 2); /* { dg-error {mismatched integer types} } */
+ svwhilele_c8 (u64, 0U, 2);
+
+ svwhilele_c8 (s64, s32, 2);
+ svwhilele_c8 (s64, u32, 2);
+ svwhilele_c8 (u64, s32, 2); /* { dg-error {mismatched integer types} } */
+ svwhilele_c8 (u64, u32, 2);
+
+ svwhilele_c8 (u64, u64, u64); /* { dg-error {argument 3 of 'svwhilele_c8' must be an integer constant expression} } */
+ svwhilele_c8 (u64, u64, 1); /* { dg-error {passing 1 to argument 3 of 'svwhilele_c8', which expects either 2 or 4} } */
+}
*ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE type rather than a scalar} } */
*ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE type rather than a scalar} } */
*ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but argument 1 had type 'svbool_t'} } */
- *ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */
+ *ptr = svcreate2 (pg, pg); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svboolx2_t'} } */
*ptr = svcreate2 (u8, u8);
*ptr = svcreate2 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */
}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+ svint32_t s32, svuint32_t u32,
+ svint8x2_t s8x2, svuint8x2_t u8x2,
+ svint8x3_t s8x3, svuint8x3_t u8x3,
+ svint8x4_t s8x4, svuint8x4_t u8x4,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8); /* { dg-error {too few arguments to function 'svusdot_lane_za32_vg1x2'} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0, 0); /* { dg-error {too many arguments to function 'svusdot_lane_za32_vg1x2'} } */
+
+ svusdot_lane_za32_vg1x2 (u8x2, u8x2, s8, 0); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svusdot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+ svusdot_lane_za32_vg1x2 (f, u8x2, s8, 0);
+ svusdot_lane_za32_vg1x2 (d, u8x2, s8, 0);
+ svusdot_lane_za32_vg1x2 (pg, u8x2, s8, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svusdot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+
+ svusdot_lane_za32_vg1x2 (0, 1, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svusdot_lane_za32_vg1x2 (0, pg, s8, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_lane_za32_vg1x2 (0, u8, s8, 0); /* { dg-error {passing single vector 'svuint8_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_lane_za32_vg1x2 (0, u8x3, s8, 0); /* { dg-error {passing 'svuint8x3_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svusdot_lane_za32_vg1x2 (0, u8x4, s8, 0); /* { dg-error {passing 'svuint8x4_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+ svusdot_lane_za32_vg1x2 (0, u8x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects a vector of signed integers} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects a vector of signed integers} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, s32, 0); /* { dg-error {arguments 2 and 3 of 'svusdot_lane_za32_vg1x2' must have the same element size, but the values passed here have type 'svuint8x2_t' and 'svint32_t' respectively} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8x2, 0); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects a single SVE vector rather than a tuple} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0);
+ svusdot_lane_za32_vg1x2 (0, s8x2, s8, 0); /* { dg-error {'svusdot_lane_za32_vg1x2' has no form that takes 'svint8x2_t' arguments} } */
+ svusdot_lane_za32_vg1x2 (0, u16x2, s16, 0); /* { dg-error {'svusdot_lane_za32_vg1x2' has no form that takes 'svuint16x2_t' arguments} } */
+
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, 3);
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, 3);
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, f); /* { dg-error {argument 4 of 'svusdot_lane_za32_vg1x2' must be an integer constant expression} } */
+}
+
+void
+f2 (svuint8x2_t u8x2, svint8_t s8) __arm_streaming
+{
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0); /* { dg-error {ACLE function 'svusdot_lane_za32_u8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svuint8x2_t u8x2, svint8_t s8) __arm_inout("za")
+{
+ svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0); /* { dg-error {ACLE function 'svusdot_lane_za32_u8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+ svint32_t s32, svuint32_t u32,
+ svint8x2_t s8x2, svuint8x2_t u8x2,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ svint16x3_t s16x3, svuint16x3_t u16x3,
+ svint16x4_t s16x4, svuint16x4_t u16x4,
+ svint32x2_t s32x2, svuint32x2_t u32x2,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svdot_lane_za32_vg1x2 (0, s16x2, s16); /* { dg-error {too few arguments to function 'svdot_lane_za32_vg1x2'} } */
+ svdot_lane_za32_vg1x2 (0, s16x2, s16, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane_za32_vg1x2'} } */
+
+ svdot_lane_za32_vg1x2 (s16x2, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 1 of 'svdot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+ svdot_lane_za32_vg1x2 (f, s16x2, s16, 0);
+ svdot_lane_za32_vg1x2 (d, s16x2, s16, 0);
+ svdot_lane_za32_vg1x2 (pg, s16x2, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svdot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+
+ svdot_lane_za32_vg1x2 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svdot_lane_za32_vg1x2 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svdot_lane_za32_vg1x2 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svdot_lane_za32_vg1x2 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svdot_lane_za32_vg1x2 (0, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+ svdot_lane_za32_vg1x2 (0, s16x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svdot_lane_za32_vg1x2 (0, s16x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svint16_t'} } */
+ svdot_lane_za32_vg1x2 (0, s16x2, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svint16_t'} } */
+ svdot_lane_za32_vg1x2 (0, s16x2, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svint16_t'} } */
+ svdot_lane_za32_vg1x2 (0, s16x2, s16x2, 0); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svdot_lane_za32_vg1x2', which expects a single SVE vector rather than a tuple} } */
+ svdot_lane_za32_vg1x2 (0, u16x2, u16, 0);
+ svdot_lane_za32_vg1x2 (0, u16x2, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svuint16_t'} } */
+ svdot_lane_za32_vg1x2 (0, s32x2, s32, 0); /* { dg-error {'svdot_lane_za32_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+ svdot_lane_za32_vg1x2 (0, u32x2, u32, 0); /* { dg-error {'svdot_lane_za32_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+
+ svdot_lane_za32_vg1x2 (0, s8x2, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_za32_vg1x2 (0, s8x2, s8, 3);
+ svdot_lane_za32_vg1x2 (0, s8x2, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_za32_vg1x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_za32_vg1x2 (0, s16x2, s16, 3);
+ svdot_lane_za32_vg1x2 (0, s16x2, s16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_za32_vg1x2 (0, s16x2, s16, f); /* { dg-error {argument 4 of 'svdot_lane_za32_vg1x2' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x2_t s16x2, svint16_t s16) __arm_streaming
+{
+ svdot_lane_za32_vg1x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x2_t s16x2, svint16_t s16) __arm_inout("za")
+{
+ svdot_lane_za32_vg1x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ svint32_t s32, svuint32_t u32,
+ svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint64_t s64, svuint64_t u64,
+ svint64x2_t s64x2, svuint64x2_t u64x2)
+ __arm_streaming __arm_inout("za")
+{
+ svdot_lane_za64_vg1x2 (0, s16x2, s16, 0);
+ svdot_lane_za64_vg1x2 (0, u16x2, u16, 0);
+ svdot_lane_za64_vg1x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za64_vg1x2', which expects a value in the range \[0, 1\]} } */
+ svdot_lane_za64_vg1x2 (0, s16x2, s16, 1);
+ svdot_lane_za64_vg1x2 (0, u16x2, u16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane_za64_vg1x2', which expects a value in the range \[0, 1\]} } */
+ svdot_lane_za64_vg1x2 (0, s32x2, s32, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+ svdot_lane_za64_vg1x2 (0, u32x2, u32, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+ svdot_lane_za64_vg1x2 (0, s64x2, s64, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+ svdot_lane_za64_vg1x2 (0, u64x2, u64, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+ svint32_t s32, svuint32_t u32,
+ svint8x4_t s8x4, svuint8x4_t u8x4,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ svint16x3_t s16x3, svuint16x3_t u16x3,
+ svint16x4_t s16x4, svuint16x4_t u16x4,
+ svint32x4_t s32x4, svuint32x4_t u32x4,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svdot_lane_za32_vg1x4 (0, s16x4, s16); /* { dg-error {too few arguments to function 'svdot_lane_za32_vg1x4'} } */
+ svdot_lane_za32_vg1x4 (0, s16x4, s16, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane_za32_vg1x4'} } */
+
+ svdot_lane_za32_vg1x4 (s16x4, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 1 of 'svdot_lane_za32_vg1x4', which expects 'uint32_t'} } */
+ svdot_lane_za32_vg1x4 (f, s16x4, s16, 0);
+ svdot_lane_za32_vg1x4 (d, s16x4, s16, 0);
+ svdot_lane_za32_vg1x4 (pg, s16x4, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svdot_lane_za32_vg1x4', which expects 'uint32_t'} } */
+
+ svdot_lane_za32_vg1x4 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane_za32_vg1x4', which expects an SVE type rather than a scalar type} } */
+ svdot_lane_za32_vg1x4 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */
+ svdot_lane_za32_vg1x4 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */
+ svdot_lane_za32_vg1x4 (0, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */
+ svdot_lane_za32_vg1x4 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */
+
+ svdot_lane_za32_vg1x4 (0, s16x4, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane_za32_vg1x4', which expects an SVE type rather than a scalar type} } */
+ svdot_lane_za32_vg1x4 (0, s16x4, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svint16_t'} } */
+ svdot_lane_za32_vg1x4 (0, s16x4, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svint16_t'} } */
+ svdot_lane_za32_vg1x4 (0, s16x4, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svint16_t'} } */
+ svdot_lane_za32_vg1x4 (0, s16x4, s16x4, 0); /* { dg-error {passing 'svint16x4_t' to argument 3 of 'svdot_lane_za32_vg1x4', which expects a single SVE vector rather than a tuple} } */
+ svdot_lane_za32_vg1x4 (0, u16x4, u16, 0);
+ svdot_lane_za32_vg1x4 (0, u16x4, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svuint16_t'} } */
+ svdot_lane_za32_vg1x4 (0, s32x4, s32, 0); /* { dg-error {'svdot_lane_za32_vg1x4' has no form that takes 'svint32x4_t' arguments} } */
+ svdot_lane_za32_vg1x4 (0, u32x4, u32, 0); /* { dg-error {'svdot_lane_za32_vg1x4' has no form that takes 'svuint32x4_t' arguments} } */
+
+ svdot_lane_za32_vg1x4 (0, s8x4, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_za32_vg1x4 (0, s8x4, s8, 3);
+ svdot_lane_za32_vg1x4 (0, s8x4, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_za32_vg1x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_za32_vg1x4 (0, s16x4, s16, 3);
+ svdot_lane_za32_vg1x4 (0, s16x4, s16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_za32_vg1x4 (0, s16x4, s16, f); /* { dg-error {argument 4 of 'svdot_lane_za32_vg1x4' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x4_t s16x4, svint16_t s16) __arm_streaming
+{
+ svdot_lane_za32_vg1x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x4' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x4_t s16x4, svint16_t s16) __arm_inout("za")
+{
+ svdot_lane_za32_vg1x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x4' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+ svint16x4_t s16x4, svuint16x4_t u16x4,
+ svint32_t s32, svuint32_t u32,
+ svint32x4_t s32x4, svuint32x4_t u32x4,
+ svint64_t s64, svuint64_t u64,
+ svint64x4_t s64x4, svuint64x4_t u64x4)
+ __arm_streaming __arm_inout("za")
+{
+ svdot_lane_za64_vg1x4 (0, s16x4, s16, 0);
+ svdot_lane_za64_vg1x4 (0, u16x4, u16, 0);
+ svdot_lane_za64_vg1x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za64_vg1x4', which expects a value in the range \[0, 1\]} } */
+ svdot_lane_za64_vg1x4 (0, s16x4, s16, 1);
+ svdot_lane_za64_vg1x4 (0, u16x4, u16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane_za64_vg1x4', which expects a value in the range \[0, 1\]} } */
+ svdot_lane_za64_vg1x4 (0, s32x4, s32, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svint32x4_t' arguments} } */
+ svdot_lane_za64_vg1x4 (0, u32x4, u32, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svuint32x4_t' arguments} } */
+ svdot_lane_za64_vg1x4 (0, s64x4, s64, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svint64x4_t' arguments} } */
+ svdot_lane_za64_vg1x4 (0, u64x4, u64, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svuint64x4_t' arguments} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+ svint32_t s32, svuint32_t u32,
+ svint8x2_t s8x2, svuint8x2_t u8x2,
+ svint8x3_t s8x3, svuint8x3_t u8x3,
+ svint8x4_t s8x4, svuint8x4_t u8x4,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8); /* { dg-error {too few arguments to function 'svsudot_lane_za32_vg1x2'} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0, 0); /* { dg-error {too many arguments to function 'svsudot_lane_za32_vg1x2'} } */
+
+ svsudot_lane_za32_vg1x2 (u8x2, s8x2, u8, 0); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svsudot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+ svsudot_lane_za32_vg1x2 (f, s8x2, u8, 0);
+ svsudot_lane_za32_vg1x2 (d, s8x2, u8, 0);
+ svsudot_lane_za32_vg1x2 (pg, s8x2, u8, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svsudot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+
+ svsudot_lane_za32_vg1x2 (0, 1, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svsudot_lane_za32_vg1x2 (0, pg, u8, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_lane_za32_vg1x2 (0, s8, u8, 0); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_lane_za32_vg1x2 (0, s8x3, u8, 0); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svsudot_lane_za32_vg1x2 (0, s8x4, u8, 0); /* { dg-error {passing 'svint8x4_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+ svsudot_lane_za32_vg1x2 (0, s8x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects a vector of unsigned integers} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects a vector of unsigned integers} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, u32, 0); /* { dg-error {arguments 2 and 3 of 'svsudot_lane_za32_vg1x2' must have the same element size, but the values passed here have type 'svint8x2_t' and 'svuint32_t' respectively} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8x2, 0); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects a single SVE vector rather than a tuple} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0);
+ svsudot_lane_za32_vg1x2 (0, u8x2, u8, 0); /* { dg-error {'svsudot_lane_za32_vg1x2' has no form that takes 'svuint8x2_t' arguments} } */
+ svsudot_lane_za32_vg1x2 (0, s16x2, u16, 0); /* { dg-error {'svsudot_lane_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, 3);
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, 3);
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, f); /* { dg-error {argument 4 of 'svsudot_lane_za32_vg1x2' must be an integer constant expression} } */
+}
+
+void
+f2 (svint8x2_t s8x2, svuint8_t u8) __arm_streaming
+{
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0); /* { dg-error {ACLE function 'svsudot_lane_za32_s8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2, svuint8_t u8) __arm_inout("za")
+{
+ svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0); /* { dg-error {ACLE function 'svsudot_lane_za32_s8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svboolx2_t pgx2,
+ svint8x2_t s8x2, svuint8x2_t u8x2,
+ svint8x4_t s8x4, svuint8x4_t u8x4,
+ svint16x2_t s16x2, svuint16x2_t u16x2,
+ svint16x4_t s16x4, svuint16x4_t u16x4,
+ svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint32x4_t s32x4, svuint32x4_t u32x4,
+ svint64x2_t s64x2, svuint64x2_t u64x2,
+ svint64x4_t s64x4, svuint64x4_t u64x4,
+ svfloat32x2_t f32x2, int x) __arm_streaming
+{
+ const int one = 1;
+ svqrshr_u8 (u32x4); /* { dg-error {too few arguments to function 'svqrshr_u8'} } */
+ svqrshr_u8 (u32x4, 1, 1); /* { dg-error {too many arguments to function 'svqrshr_u8'} } */
+
+ svqrshr_u8 (u32x4, x); /* { dg-error {argument 2 of 'svqrshr_u8' must be an integer constant expression} } */
+ svqrshr_u8 (u32x4, one); /* { dg-error {argument 2 of 'svqrshr_u8' must be an integer constant expression} } */
+ svqrshr_u8 (u32x4, 0.4); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */
+ svqrshr_u8 (u32x4, 1.0);
+
+ svqrshr_u8 (pgx2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svboolx2_t' arguments} } */
+ svqrshr_u8 (u8x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint8x2_t' arguments} } */
+ svqrshr_u8 (u8x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint8x4_t' arguments} } */
+ svqrshr_u8 (u16x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint16x2_t' arguments} } */
+ svqrshr_u8 (u16x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint16x4_t' arguments} } */
+ svqrshr_u8 (u32x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint32x2_t' arguments} } */
+ svqrshr_u8 (u32x4, 1);
+ svqrshr_u8 (u64x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint64x2_t' arguments} } */
+ svqrshr_u8 (u64x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint64x4_t' arguments} } */
+ svqrshr_u8 (s32x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svint32x4_t' arguments} } */
+
+ svqrshr_s8 (s8x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint8x2_t' arguments} } */
+ svqrshr_s8 (s8x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint8x4_t' arguments} } */
+ svqrshr_s8 (s16x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint16x2_t' arguments} } */
+ svqrshr_s8 (s16x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint16x4_t' arguments} } */
+ svqrshr_s8 (s32x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint32x2_t' arguments} } */
+ svqrshr_s8 (s32x4, 1);
+ svqrshr_s8 (s64x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint64x2_t' arguments} } */
+ svqrshr_s8 (s64x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint64x4_t' arguments} } */
+ svqrshr_s8 (u32x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svuint32x4_t' arguments} } */
+
+ svqrshr_u16 (pgx2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svboolx2_t' arguments} } */
+ svqrshr_u16 (u8x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint8x2_t' arguments} } */
+ svqrshr_u16 (u8x4, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint8x4_t' arguments} } */
+ svqrshr_u16 (u16x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint16x2_t' arguments} } */
+ svqrshr_u16 (u16x4, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint16x4_t' arguments} } */
+ svqrshr_u16 (u32x2, 1);
+ svqrshr_u16 (u32x4, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint32x4_t' arguments} } */
+ svqrshr_u16 (u64x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint64x2_t' arguments} } */
+ svqrshr_u16 (u64x4, 1);
+ svqrshr_u16 (s32x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svint32x2_t' arguments} } */
+
+ svqrshr_s16 (s8x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint8x2_t' arguments} } */
+ svqrshr_s16 (s8x4, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint8x4_t' arguments} } */
+ svqrshr_s16 (s16x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint16x2_t' arguments} } */
+ svqrshr_s16 (s16x4, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint16x4_t' arguments} } */
+ svqrshr_s16 (s32x2, 1);
+ svqrshr_s16 (s32x4, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint32x4_t' arguments} } */
+ svqrshr_s16 (s64x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint64x2_t' arguments} } */
+ svqrshr_s16 (s64x4, 1);
+ svqrshr_s16 (u32x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svuint32x2_t' arguments} } */
+
+ svqrshr_u8 (u32x4, -1); /* { dg-error {passing -1 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */
+ svqrshr_u8 (u32x4, 0); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */
+ svqrshr_u8 (u32x4, 1);
+ svqrshr_u8 (u32x4, 32);
+ svqrshr_u8 (u32x4, 33); /* { dg-error {passing 33 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */
+
+ svqrshr_u16 (u32x2, -1); /* { dg-error {passing -1 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 16\]} } */
+ svqrshr_u16 (u32x2, 0); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 16\]} } */
+ svqrshr_u16 (u32x2, 1);
+ svqrshr_u16 (u32x2, 16);
+ svqrshr_u16 (u32x2, 17); /* { dg-error {passing 17 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 16\]} } */
+
+ svqrshr_u16 (u64x4, -1); /* { dg-error {passing -1 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 64\]} } */
+ svqrshr_u16 (u64x4, 0); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 64\]} } */
+ svqrshr_u16 (u64x4, 1);
+ svqrshr_u16 (u64x4, 64);
+ svqrshr_u16 (u64x4, 65); /* { dg-error {passing 65 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 64\]} } */
+
+ svqrshr_u8 (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svqrshr_u8', which expects an SVE type rather than a scalar} } */
+}
{
svst1 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */
svst1 (pg, s8_ptr, s8, 0); /* { dg-error {too many arguments to function 'svst1'} } */
- svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects 'svbool_t'} } */
+ svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects an 'svbool_t' or 'svcount_t'} } */
svst1 (pg, void_ptr, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE type rather than a scalar} } */
svst1 (pg, void_ptr, pg); /* { dg-error {'svst1' has no form that takes 'svbool_t' arguments} } */
svst1 (pg, 0, s8);
{
svst1_vnum (pg, s8_ptr, 0); /* { dg-error {too few arguments to function 'svst1_vnum'} } */
svst1_vnum (pg, s8_ptr, 0, s8, 0); /* { dg-error {too many arguments to function 'svst1_vnum'} } */
- svst1_vnum (0, s8_ptr, 0, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1_vnum', which expects 'svbool_t'} } */
+ svst1_vnum (0, s8_ptr, 0, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1_vnum', which expects an 'svbool_t' or 'svcount_t'} } */
svst1_vnum (pg, s8_ptr, pg, s8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */
svst1_vnum (pg, s8_ptr, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */
svst1_vnum (pg, s8_ptr, void_ptr, s8); /* { dg-error "passing argument 3 of 'svst1_vnum_s8' makes integer from pointer without a cast" } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-std=c99" } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+struct s { signed char x; };
+
+svuint8_t
+f1 (svbool_t pg, svcount_t pn, svboolx2_t pgx2,
+ signed char *s8_ptr, void *void_ptr, struct s *s_ptr,
+ float *f32_ptr, _Complex float *cf32_ptr,
+ svint8_t s8, svint8x2_t s8x2, svint8x3_t s8x3,
+ svfloat32x4_t f32x4, struct s s) __arm_streaming
+{
+ svst1 (pn, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */
+ svst1 (pn, s8_ptr, s8x2, 0); /* { dg-error {too many arguments to function 'svst1'} } */
+ svst1 (0, s8_ptr, s8x2); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects an 'svbool_t' or 'svcount_t'} } */
+ svst1 (pn, void_ptr, 0x2); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE type rather than a scalar} } */
+ svst1 (pn, void_ptr, pgx2); /* { dg-error {'svst1' has no form that takes 'svboolx2_t' arguments} } */
+ svst1 (pn, 0, s8); /* { dg-error {operations on single vectors must be predicated by 'svbool_t' rather than 'svcount_t'} } */
+ svst1 (pn, 0, s8x2);
+ svst1 (pg, 0, s8x2); /* { dg-error {operations on multiple vectors must be predicated by 'svcount_t' rather than 'svbool_t'} } */
+ svst1 (pn, 0, s8x3); /* { dg-error {'svst1' has no form that takes 'svint8x3_t' arguments} } */
+ svst1 (pn, (int32_t *) 0, s8x2); /* { dg-error "passing argument 2 of 'svst1_s8_x2' from incompatible pointer type" } */
+ svst1 (pn, void_ptr, s8x2);
+ svst1 (pn, s_ptr, s8x2); /* { dg-error "passing argument 2 of 'svst1_s8_x2' from incompatible pointer type" } */
+ svst1 (pn, f32_ptr, s8x2); /* { dg-error "passing argument 2 of 'svst1_s8_x2' from incompatible pointer type" } */
+ svst1 (pn, f32_ptr, f32x4);
+ svst1 (pn, cf32_ptr, f32x4); /* { dg-error "passing argument 2 of 'svst1_f32_x4' from incompatible pointer type" } */
+ svst1 (pn, s, s8x2); /* { dg-error {passing 'struct s' to argument 2 of 'svst1', which expects a scalar pointer} } */
+}
svdot_lane (u32, u8, u8); /* { dg-error {too few arguments to function 'svdot_lane'} } */
svdot_lane (u32, u8, u8, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane'} } */
svdot_lane (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane', which expects an SVE type rather than a scalar} } */
- svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' arguments} } */
- svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' arguments} } */
- svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' arguments} } */
+ svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' and 'svuint8_t' arguments} } */
+ svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' and 'svuint8_t' arguments} } */
+ svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' and 'svuint8_t' arguments} } */
svdot_lane (u32, u8, u8, 0);
svdot_lane (u32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane', which expects an SVE type rather than a scalar} } */
svdot_lane (u32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane', which expects an SVE type rather than a scalar} } */
svdot_lane (s32, s8, s8, 0);
- svdot_lane (s32, u8, s8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */
- svdot_lane (s32, s8, u8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */
- svdot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svdot_lane', after passing 'svint32_t' to argument 1} } */
+ svdot_lane (s32, u8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint8_t'} } */
+ svdot_lane (s32, s8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint8_t'} } */
+ svdot_lane (s32, s32, s32, 0); /* { dg-error {'svdot_lane' has no form that takes 'svint32_t' and 'svint32_t' arguments} } */
svdot_lane (u32, u8, u8, 0);
- svdot_lane (u32, s8, u8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
- svdot_lane (u32, u8, s8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
- svdot_lane (u32, u32, u32, 0); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot_lane', after passing 'svuint32_t' to argument 1} } */
+ svdot_lane (u32, s8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint8_t'} } */
+ svdot_lane (u32, u8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint8_t'} } */
+ svdot_lane (u32, u32, u32, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint32_t' and 'svuint32_t' arguments} } */
svdot_lane (s64, s16, s16, 0);
- svdot_lane (s64, u16, s16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */
- svdot_lane (s64, s16, u16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */
- svdot_lane (s64, s64, s64, 0); /* { dg-error {passing 'svint64_t' instead of the expected 'svint16_t' to argument 2 of 'svdot_lane', after passing 'svint64_t' to argument 1} } */
+ svdot_lane (s64, u16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint16_t'} } */
+ svdot_lane (s64, s16, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint16_t'} } */
+ svdot_lane (s64, s64, s64, 0); /* { dg-error {'svdot_lane' has no form that takes 'svint64_t' and 'svint64_t' arguments} } */
svdot_lane (u64, u16, u16, 0);
- svdot_lane (u64, s16, u16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */
- svdot_lane (u64, u16, s16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */
- svdot_lane (u64, u64, u64, 0); /* { dg-error {passing 'svuint64_t' instead of the expected 'svuint16_t' to argument 2 of 'svdot_lane', after passing 'svuint64_t' to argument 1} } */
+ svdot_lane (u64, s16, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint16_t'} } */
+ svdot_lane (u64, u16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint16_t'} } */
+ svdot_lane (u64, u64, u64, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint64_t' and 'svuint64_t' arguments} } */
svdot_lane (s32, s8, s8, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */
svdot_lane (s32, s8, s8, 0);
svdot (u32, u8); /* { dg-error {too few arguments to function 'svdot'} } */
svdot (u32, u8, u8, u8); /* { dg-error {too many arguments to function 'svdot'} } */
svdot (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svdot', which expects an SVE type rather than a scalar} } */
- svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' arguments} } */
- svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' arguments} } */
- svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' arguments} } */
+ svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' and 'svuint8_t' arguments} }*/
+ svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' and 'svuint8_t' arguments} } */
+ svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' and 'svuint8_t' arguments} } */
svdot (u32, u8, u8);
svdot (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svdot', which expects an SVE type rather than a scalar} } */
- svdot (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
+ svdot (u32, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot', but argument 2 had type 'svint8_t'} } */
svdot (u32, u8, 0);
- svdot (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
- svdot (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot', after passing 'svuint32_t' to argument 1} } */
+ svdot (u32, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svdot', but argument 2 had type 'svuint8_t'} } */
+ svdot (u32, u32, u32); /* { dg-error {'svdot' has no form that takes 'svuint32_t' and 'svuint32_t' arguments} } */
}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+ svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64,
+ svfloat16_t f16, svfloat32_t f32, int i) __arm_streaming
+{
+ svdot_lane (u32, u16, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint16_t'} } */
+ svdot_lane (u32, u8, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint8_t'} } */
+ svdot_lane (u32, s16, s16, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint32_t' and 'svint16_t' arguments} } */
+
+ svdot_lane (u32, u16, u16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */
+ svdot_lane (u32, u16, u16, 0);
+ svdot_lane (u32, u16, u16, 3);
+ svdot_lane (u32, u16, u16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+ svdot_lane (u32, u16, u16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+
+ svdot_lane (s32, s16, s16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */
+ svdot_lane (s32, s16, s16, 0);
+ svdot_lane (s32, s16, s16, 3);
+ svdot_lane (s32, s16, s16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+ svdot_lane (s32, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+
+ svdot_lane (f32, f16, f16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */
+ svdot_lane (f32, f16, f16, 0);
+ svdot_lane (f32, f16, f16, 3);
+ svdot_lane (f32, f16, f16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+ svdot_lane (f32, f16, f16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+}
--- /dev/null
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+test (svbool_t pg, float f, svint8_t s8, svfloat32_t f32,
+ svint32x2_t s32x2, svint32x3_t s32x3, svint32x4_t s32x4,
+ svfloat32x2_t f32x2, svfloat32x3_t f32x3, svfloat32x4_t f32x4)
+ __arm_streaming
+{
+ svcvt_bf16 (); /* { dg-error {too few arguments to function 'svcvt_bf16'} } */
+ svcvt_bf16 (f32x2, f32x2); /* { dg-error {too many arguments to function 'svcvt_bf16'} } */
+ svcvt_bf16 (0); /* { dg-error {passing 'int' to argument 1 of 'svcvt_bf16', which expects an SVE type rather than a scalar} } */
+ svcvt_bf16 (f); /* { dg-error {passing 'float' to argument 1 of 'svcvt_bf16', which expects an SVE type rather than a scalar} } */
+ svcvt_bf16 (pg); /* { dg-error {svcvt_bf16' has no form that takes 'svbool_t' arguments} } */
+ svcvt_bf16 (s8); /* { dg-error {svcvt_bf16' has no form that takes 'svint8_t' arguments} } */
+ svcvt_bf16 (f32); /* { dg-error {svcvt_bf16' has no form that takes 'svfloat32_t' arguments} } */
+ svcvt_bf16 (f32x2);
+ svcvt_bf16 (f32x3); /* { dg-error {svcvt_bf16' has no form that takes 'svfloat32x3_t' arguments} } */
+ svcvt_bf16 (f32x4); /* { dg-error {svcvt_bf16' has no form that takes 'svfloat32x4_t' arguments} } */
+ svcvt_bf16 (s32x2); /* { dg-error {svcvt_bf16' has no form that takes 'svint32x2_t' arguments} } */
+ svcvt_s32 (f32x2);
+ svcvt_s32 (f32x3); /* { dg-error {svcvt_s32' has no form that takes 'svfloat32x3_t' arguments} } */
+ svcvt_s32 (f32x4);
+ svcvt_f32 (s32x2);
+ svcvt_f32 (s32x3); /* { dg-error {svcvt_f32' has no form that takes 'svint32x3_t' arguments} } */
+ svcvt_f32 (s32x4);
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint32_t s32, svint16x2_t s16x2, svint32x2_t s32x2,
+ svint32x3_t s32x3, svint32x4_t s32x4, svint64x2_t s64x2, float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svadd_za32_vg1x2 (1); /* { dg-error {too few arguments to function 'svadd_za32_vg1x2'} } */
+ svadd_za32_vg1x2 (1, s32x2, s32x2); /* { dg-error {too many arguments to function 'svadd_za32_vg1x2'} } */
+
+ svadd_za32_vg1x2 (s32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svadd_za32_vg1x2', which expects 'uint32_t'} } */
+ svadd_za32_vg1x2 (f, s32x2);
+ svadd_za32_vg1x2 (d, s32x2);
+ svadd_za32_vg1x2 (pg, s32x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_za32_vg1x2', which expects 'uint32_t'} } */
+
+ svadd_za32_vg1x2 (1, 1); /* { dg-error {passing 'int' to argument 2 of 'svadd_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+ svadd_za32_vg1x2 (1, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_za32_vg1x2 (1, s32); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_za32_vg1x2 (1, s32x2);
+ svadd_za32_vg1x2 (1, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */
+ svadd_za32_vg1x2 (1, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+ svadd_za32_vg1x2 (1, s16x2); /* { dg-error {'svadd_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+ svadd_za32_vg1x2 (1, s64x2); /* { dg-error {'svadd_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+}
+
+void
+f2 (svint32x2_t s32x2) __arm_streaming
+{
+ svadd_za32_vg1x2 (0, s32x2); /* { dg-error {ACLE function 'svadd_za32_s32_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint32x2_t s32x2) __arm_inout("za")
+{
+ svadd_za32_vg1x2 (0, s32x2); /* { dg-error {ACLE function 'svadd_za32_s32_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint64x2_t s64x2, svuint64x2_t u64x2)
+ __arm_streaming __arm_inout("za")
+{
+ svadd_za64_vg1x2 (1, s32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+ svadd_za64_vg1x2 (1, u32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+ svadd_za64_vg1x2 (1, s64x2);
+ svadd_za64_vg1x2 (1, u64x2);
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv9-a+sme2")
+
+void
+f1 (svbool_t pg, svint32_t s32, svint16x4_t s16x4, svint32x2_t s32x2,
+ svint32x3_t s32x3, svint32x4_t s32x4, svint64x4_t s64x4, float f, double d)
+ __arm_streaming __arm_inout("za")
+{
+ svadd_za32_vg1x4 (1); /* { dg-error {too few arguments to function 'svadd_za32_vg1x4'} } */
+ svadd_za32_vg1x4 (1, s32x4, s32x4); /* { dg-error {too many arguments to function 'svadd_za32_vg1x4'} } */
+
+ svadd_za32_vg1x4 (s32x2, s32x4); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svadd_za32_vg1x4', which expects 'uint32_t'} } */
+ svadd_za32_vg1x4 (f, s32x4);
+ svadd_za32_vg1x4 (d, s32x4);
+ svadd_za32_vg1x4 (pg, s32x4); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_za32_vg1x4', which expects 'uint32_t'} } */
+
+ svadd_za32_vg1x4 (1, s32); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_za32_vg1x4', which expects a tuple of 4 vectors} } */
+ svadd_za32_vg1x4 (1, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 2 of 'svadd_za32_vg1x4', which expects a tuple of 4 vectors} } */
+ svadd_za32_vg1x4 (1, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_za32_vg1x4', which expects a tuple of 4 vectors} } */
+ svadd_za32_vg1x4 (1, s32x4);
+
+ svadd_za32_vg1x4 (1, s16x4); /* { dg-error {'svadd_za32_vg1x4' has no form that takes 'svint16x4_t' arguments} } */
+ svadd_za32_vg1x4 (1, s64x4); /* { dg-error {'svadd_za32_vg1x4' has no form that takes 'svint64x4_t' arguments} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv9-a+sme2+nosme-i16i64")
+
+void
+f1 (svint32x2_t s32x2, svuint32x2_t u32x2,
+ svint64x2_t s64x2, svuint64x2_t u64x2)
+ __arm_streaming __arm_inout("za")
+{
+ svadd_za64_vg1x2 (1, s32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+ svadd_za64_vg1x2 (1, u32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+ svadd_za64_vg1x2 (1, s64x2); /* { dg-error {ACLE function 'svadd_za64_s64_vg1x2' requires ISA extension 'sme-i16i64'} } */
+ svadd_za64_vg1x2 (1, u64x2);
+}
--- /dev/null
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+test (svfloat32_t f32, svfloat32x2_t f32x2, svfloat32x3_t f32x3,
+ svfloat32x4_t f32x4) __arm_streaming
+{
+ svuzp (); /* { dg-error {too few arguments to function 'svuzp'} } */
+ svuzp (f32x2, f32x2); /* { dg-error {too many arguments to function 'svuzp'} } */
+ svuzp (f32); /* { dg-error {svuzp' has no form that takes 'svfloat32_t' arguments} } */
+ svuzp (f32x2);
+ svuzp (f32x3); /* { dg-error {svuzp' has no form that takes 'svfloat32x3_t' arguments} } */
+ svuzp (f32x4);
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svint8_t s8, svint8x2_t s8x2, svint8x3_t s8x3, svint8x4_t s8x4,
+ svuint8_t u8, svuint16x2_t u16x2, svfloat32x2_t f32x2, svint64x2_t s64x2,
+ uint32_t tile)
+ __arm_streaming __arm_inout("za")
+{
+ svwrite_ver_za8_vg2 (0, 0); /* { dg-error {too few arguments to function 'svwrite_ver_za8_vg2'} } */
+ svwrite_ver_za8_vg2 (0, 0, s8x2, 0); /* { dg-error {too many arguments to function 'svwrite_ver_za8_vg2'} } */
+ svwrite_ver_za8_vg2 (tile, 0, s8x2); /* { dg-error {argument 1 of 'svwrite_ver_za8_vg2' must be an integer constant expression} } */
+ svwrite_ver_za8_vg2 (-1, 0, s8x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za8_vg2', which expects the value 0} } */
+ svwrite_ver_za8_vg2 (1, 0, s8x2); /* { dg-error {passing 1 to argument 1 of 'svwrite_ver_za8_vg2', which expects the value 0} } */
+ svwrite_ver_za8_vg2 (0, u8, s8x2); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svwrite_ver_za8_vg2', which expects 'uint32_t'} } */
+ svwrite_ver_za8_vg2 (0, 0, tile); /* { dg-error {passing 'uint32_t'.* to argument 3 of 'svwrite_ver_za8_vg2', which expects an SVE type} } */
+ svwrite_ver_za8_vg2 (0, 0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 3 of 'svwrite_ver_za8_vg2', which expects a tuple of 2 vectors} } */
+ svwrite_ver_za8_vg2 (0, 0, s8x2);
+ svwrite_ver_za8_vg2 (0, 0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svwrite_ver_za8_vg2', which expects a tuple of 2 vectors} } */
+ svwrite_ver_za8_vg2 (0, 0, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 3 of 'svwrite_ver_za8_vg2', which expects a tuple of 2 vectors} } */
+
+ svwrite_ver_za16_vg2 (-1, 0, u16x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za16_vg2', which expects a value in the range \[0, 1\]} } */
+ svwrite_ver_za16_vg2 (2, 0, u16x2); /* { dg-error {passing 2 to argument 1 of 'svwrite_ver_za16_vg2', which expects a value in the range \[0, 1\]} } */
+
+ svwrite_ver_za32_vg2 (-1, 0, f32x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za32_vg2', which expects a value in the range \[0, 3\]} } */
+ svwrite_ver_za32_vg2 (4, 0, f32x2); /* { dg-error {passing 4 to argument 1 of 'svwrite_ver_za32_vg2', which expects a value in the range \[0, 3\]} } */
+
+ svwrite_ver_za64_vg2 (-1, 0, s64x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za64_vg2', which expects a value in the range \[0, 7\]} } */
+ svwrite_ver_za64_vg2 (8, 0, s64x2); /* { dg-error {passing 8 to argument 1 of 'svwrite_ver_za64_vg2', which expects a value in the range \[0, 7\]} } */
+
+ svwrite_ver_za8_vg4 (0, 0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 3 of 'svwrite_ver_za8_vg4', which expects a tuple of 4 vectors} } */
+ svwrite_ver_za8_vg4 (0, 0, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svwrite_ver_za8_vg4', which expects a tuple of 4 vectors} } */
+ svwrite_ver_za8_vg4 (0, 0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svwrite_ver_za8_vg4', which expects a tuple of 4 vectors} } */
+ svwrite_ver_za8_vg4 (0, 0, s8x4);
+}
+
+void
+f2 (svint8x2_t s8x2) __arm_streaming
+{
+ svwrite_ver_za8_vg2 (0, 0, s8x2); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_vg2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2) __arm_inout("za")
+{
+ svwrite_ver_za8_vg2 (0, 0, s8x2); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_vg2' can only be called when SME streaming mode is enabled} } */
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svint8_t s8, svint8x2_t s8x2, svint8x3_t s8x3, svint8x4_t s8x4,
+ svuint8_t u8, svuint16x2_t u16x2, svfloat32x2_t f32x2, svint64x2_t s64x2,
+ uint32_t tile)
+ __arm_streaming __arm_inout("za")
+{
+ svwrite_za8_vg1x2 (0); /* { dg-error {too few arguments to function 'svwrite_za8_vg1x2'} } */
+ svwrite_za8_vg1x2 (0, s8x2, 0); /* { dg-error {too many arguments to function 'svwrite_za8_vg1x2'} } */
+ svwrite_za8_vg1x2 (u8, s8x2); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svwrite_za8_vg1x2', which expects 'uint32_t'} } */
+ svwrite_za8_vg1x2 (0, tile); /* { dg-error {passing 'uint32_t'.* to argument 2 of 'svwrite_za8_vg1x2', which expects an SVE type} } */
+ svwrite_za8_vg1x2 (0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svwrite_za8_vg1x2', which expects a tuple of 2 vectors} } */
+ svwrite_za8_vg1x2 (0, s8x2);
+ svwrite_za8_vg1x2 (0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svwrite_za8_vg1x2', which expects a tuple of 2 vectors} } */
+ svwrite_za8_vg1x2 (0, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 2 of 'svwrite_za8_vg1x2', which expects a tuple of 2 vectors} } */
+
+ svwrite_za8_vg1x4 (0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svwrite_za8_vg1x4', which expects a tuple of 4 vectors} } */
+ svwrite_za8_vg1x4 (0, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 2 of 'svwrite_za8_vg1x4', which expects a tuple of 4 vectors} } */
+ svwrite_za8_vg1x4 (0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svwrite_za8_vg1x4', which expects a tuple of 4 vectors} } */
+ svwrite_za8_vg1x4 (0, s8x4);
+}
+
+void
+f2 (svint8x2_t s8x2) __arm_streaming
+{
+ svwrite_za8_vg1x2 (0, s8x2); /* { dg-error {ACLE function 'svwrite_za8_s8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2) __arm_inout("za")
+{
+ svwrite_za8_vg1x2 (0, s8x2); /* { dg-error {ACLE function 'svwrite_za8_s8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}