DEF_SVE_FUNCTION_GS (svscale, binary_int_opt_single_n, all_float, x24, none)
#undef REQUIRED_EXTENSIONS
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2p2)
+DEF_SVE_FUNCTION_GS (svmul, binary_opt_single_n, all_float, x24, none)
+#undef REQUIRED_EXTENSIONS
+
#define REQUIRED_EXTENSIONS \
nonstreaming_sve (AARCH64_FL_SVE2 | AARCH64_FL_F8F16MM)
DEF_SVE_FUNCTION_GS_FPM (svmmla, mmla, h_float_mf8, none, none, set)
;; -------------------------------------------------------------------------
;; Includes the multiple and single vector and multiple vectors forms of
;; - BFMUL (SVE_BFSCALE)
+;; - FMUL (SME2p2)
;; -------------------------------------------------------------------------
;; BFMUL (multiple vectors)
"bfmul\t%0, %1, %2.h"
)
+;; FMUL (multiple vectors)
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_Fx24_NOBF 0 "register_operand" "=Uw<vector_count>")
+ (unspec:SVE_Fx24_NOBF
+ [(match_operand:SVE_Fx24_NOBF 1 "register_operand" "Uw<vector_count>")
+ (match_operand:SVE_Fx24_NOBF 2 "register_operand" "Uw<vector_count>")]
+ SVE_FP_MUL))]
+ "TARGET_STREAMING_SME2p2"
+ "fmul\t%0, %1, %2"
+)
+
+;; FMUL (multiple x single vector)
+(define_insn "@aarch64_sve_<optab><mode>_single"
+ [(set (match_operand:SVE_Fx24_NOBF 0 "register_operand" "=Uw<vector_count>")
+ (unspec:SVE_Fx24_NOBF
+ [(match_operand:SVE_Fx24_NOBF 1 "register_operand" "Uw<vector_count>")
+ (match_operand:<VSINGLE> 2 "register_operand" "x")]
+ SVE_FP_MUL))]
+ "TARGET_STREAMING_SME2p2"
+ "fmul\t%0, %1, %2.<Vetype>"
+)
+
;; =========================================================================
;; == Uniform ternary arithmnetic
;; =========================================================================
#define TARGET_STREAMING_SME2p1 (TARGET_STREAMING && AARCH64_HAVE_ISA (SME2p1))
+#define TARGET_STREAMING_SME2p2 (TARGET_STREAMING && AARCH64_HAVE_ISA (SME2p2))
+
#define TARGET_SME_B16B16 AARCH64_HAVE_ISA (SME_B16B16)
/* ARMv8.3-A features. */
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+#pragma GCC target "+sme2p2"
+
+/*
+** mul_z0_z0_z4:
+** fmul {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (mul_z0_z0_z4, svfloat16x2_t, z0,
+ svmul_f16_x2 (z0, z4),
+ svmul (z0, z4))
+
+/*
+** mul_z0_z4_z0:
+** fmul {z0\.h - z1\.h}, {z4\.h - z5\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (mul_z0_z4_z0, svfloat16x2_t, z0,
+ svmul_f16_x2 (z4, z0),
+ svmul (z4, z0))
+
+/*
+** mul_z0_z4_z28:
+** fmul {z0\.h - z1\.h}, {z4\.h - z5\.h}, {z28\.h - z29\.h}
+** ret
+*/
+TEST_XN (mul_z0_z4_z28, svfloat16x2_t, z0,
+ svmul_f16_x2 (z4, z28),
+ svmul (z4, z28))
+
+/*
+** mul_z18_z18_z4:
+** fmul {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h}
+** ret
+*/
+TEST_XN (mul_z18_z18_z4, svfloat16x2_t, z18,
+ svmul_f16_x2 (z18, z4),
+ svmul (z18, z4))
+
+/*
+** mul_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, {z18\.h - z19\.h}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (mul_z23_z23_z18, svfloat16x2_t, z23,
+ svmul_f16_x2 (z23, z18),
+ svmul (z23, z18))
+
+/*
+** mul_z28_z28_z0:
+** fmul {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h}
+** ret
+*/
+TEST_XN (mul_z28_z28_z0, svfloat16x2_t, z28,
+ svmul_f16_x2 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_z0_z0_z18:
+** fmul {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h}
+** ret
+*/
+TEST_XN (mul_z0_z0_z18, svfloat16x2_t, z0,
+ svmul_f16_x2 (z0, z18),
+ svmul (z0, z18))
+
+/*
+** mul_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** |
+** fmul {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (mul_z4_z4_z23, svfloat16x2_t, z4,
+ svmul_f16_x2 (z4, z23),
+ svmul (z4, z23))
+
+/*
+** mul_single_z24_z24_z0:
+** fmul {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmul_single_f16_x2 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z24_z28_z0:
+** fmul {z24\.h - z25\.h}, {z28\.h - z29\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmul_single_f16_x2 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_single_z24_z1_z0:
+** (
+** mov z30\.d, z1\.d
+** mov z31\.d, z2\.d
+** |
+** mov z31\.d, z2\.d
+** mov z30\.d, z1\.d
+** )
+** fmul {z24\.h - z25\.h}, {z30\.h - z31\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24,
+ svmul_single_f16_x2 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z1_z24_z0:
+** fmul {z30\.h - z31\.h}, {z24\.h - z25\.h}, z0\.h
+** (
+** mov z2\.d, z31\.d
+** mov z1\.d, z30\.d
+** |
+** mov z1\.d, z30\.d
+** mov z2\.d, z31\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1,
+ svmul_single_f16_x2 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmul ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1,
+ svmul_single_f16_x2 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z18_z18_z0:
+** fmul {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18,
+ svmul_single_f16_x2 (z18, z0),
+ svmul (z18, z0))
+
+/*
+** mul_single_awkward:
+** ...
+** fmul {z0\.h - z1\.h}, {z30\.h - z31\.h}, z[0-9]+\.h
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat16x2_t, svfloat16_t,
+ z0_res = svmul_single_f16_x2 (z1, z0),
+ z0_res = svmul (z1, z0))
+
+/*
+** mul_single_z0_z0_z15:
+** ...
+** fmul {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat16x2_t, svfloat16_t,
+ z0 = svmul_single_f16_x2 (z0, z15),
+ z0 = svmul (z0, z15))
+
+/*
+** mul_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmul {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24,
+ svmul_single_f16_x2 (z24, z16),
+ svmul (z24, z16))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+#pragma GCC target "+sme2p2"
+
+/*
+** mul_z0_z0_z4:
+** fmul {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h}
+** ret
+*/
+TEST_XN (mul_z0_z0_z4, svfloat16x4_t, z0,
+ svmul_f16_x4 (z0, z4),
+ svmul (z0, z4))
+
+/*
+** mul_z0_z4_z0:
+** fmul {z0\.h - z3\.h}, {z4\.h - z7\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (mul_z0_z4_z0, svfloat16x4_t, z0,
+ svmul_f16_x4 (z4, z0),
+ svmul (z4, z0))
+
+/*
+** mul_z0_z4_z28:
+** fmul {z0\.h - z3\.h}, {z4\.h - z7\.h}, {z28\.h - z31\.h}
+** ret
+*/
+TEST_XN (mul_z0_z4_z28, svfloat16x4_t, z0,
+ svmul_f16_x4 (z4, z28),
+ svmul (z4, z28))
+
+/*
+** mul_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, {z4\.h - z7\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (mul_z18_z18_z4, svfloat16x4_t, z18,
+ svmul_f16_x4 (z18, z4),
+ svmul (z18, z4))
+
+/*
+** mul_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, {z28\.h - z31\.h}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (mul_z23_z23_z28, svfloat16x4_t, z23,
+ svmul_f16_x4 (z23, z28),
+ svmul (z23, z28))
+
+/*
+** mul_z28_z28_z0:
+** fmul {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h}
+** ret
+*/
+TEST_XN (mul_z28_z28_z0, svfloat16x4_t, z28,
+ svmul_f16_x4 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** |
+** fmul {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (mul_z0_z0_z18, svfloat16x4_t, z0,
+ svmul_f16_x4 (z0, z18),
+ svmul (z0, z18))
+
+/*
+** mul_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** |
+** fmul {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (mul_z4_z4_z23, svfloat16x4_t, z4,
+ svmul_f16_x4 (z4, z23),
+ svmul (z4, z23))
+
+/*
+** mul_single_z24_z24_z0:
+** fmul {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmul_single_f16_x4 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z24_z28_z0:
+** fmul {z24\.h - z27\.h}, {z28\.h - z31\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmul_single_f16_x4 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z24\.h - z27\.h}, {z28\.h - z31\.h}, z0\.h
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24,
+ svmul_single_f16_x4 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z1_z24_z0:
+** fmul {z28\.h - z31\.h}, {z24\.h - z27\.h}, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1,
+ svmul_single_f16_x4 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1,
+ svmul_single_f16_x4 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, z0\.h
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18,
+ svmul_single_f16_x4 (z18, z0),
+ svmul (z18, z0))
+
+/*
+** mul_single_awkward:
+** ...
+** fmul {z0\.h - z3\.h}, {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat16x4_t, svfloat16_t,
+ z0_res = svmul_single_f16_x4 (z1, z0),
+ z0_res = svmul (z1, z0))
+
+/*
+** mul_single_z0_z0_z15:
+** ...
+** fmul {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat16x4_t, svfloat16_t,
+ z0 = svmul_single_f16_x4 (z0, z15),
+ z0 = svmul (z0, z15))
+
+/*
+** mul_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmul {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24,
+ svmul_single_f16_x4 (z24, z16),
+ svmul (z24, z16))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+#pragma GCC target "+sme2p2"
+
+/*
+** mul_z0_z0_z4:
+** fmul {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (mul_z0_z0_z4, svfloat32x2_t, z0,
+ svmul_f32_x2 (z0, z4),
+ svmul (z0, z4))
+
+/*
+** mul_z0_z4_z0:
+** fmul {z0\.s - z1\.s}, {z4\.s - z5\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (mul_z0_z4_z0, svfloat32x2_t, z0,
+ svmul_f32_x2 (z4, z0),
+ svmul (z4, z0))
+
+/*
+** mul_z0_z4_z28:
+** fmul {z0\.s - z1\.s}, {z4\.s - z5\.s}, {z28\.s - z29\.s}
+** ret
+*/
+TEST_XN (mul_z0_z4_z28, svfloat32x2_t, z0,
+ svmul_f32_x2 (z4, z28),
+ svmul (z4, z28))
+
+/*
+** mul_z18_z18_z4:
+** fmul {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s}
+** ret
+*/
+TEST_XN (mul_z18_z18_z4, svfloat32x2_t, z18,
+ svmul_f32_x2 (z18, z4),
+ svmul (z18, z4))
+
+/*
+** mul_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, {z18\.s - z19\.s}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (mul_z23_z23_z18, svfloat32x2_t, z23,
+ svmul_f32_x2 (z23, z18),
+ svmul (z23, z18))
+
+/*
+** mul_z28_z28_z0:
+** fmul {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s}
+** ret
+*/
+TEST_XN (mul_z28_z28_z0, svfloat32x2_t, z28,
+ svmul_f32_x2 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_z0_z0_z18:
+** fmul {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s}
+** ret
+*/
+TEST_XN (mul_z0_z0_z18, svfloat32x2_t, z0,
+ svmul_f32_x2 (z0, z18),
+ svmul (z0, z18))
+
+/*
+** mul_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** |
+** fmul {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (mul_z4_z4_z23, svfloat32x2_t, z4,
+ svmul_f32_x2 (z4, z23),
+ svmul (z4, z23))
+
+/*
+** mul_single_z24_z24_z0:
+** fmul {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmul_single_f32_x2 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z24_z28_z0:
+** fmul {z24\.s - z25\.s}, {z28\.s - z29\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmul_single_f32_x2 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_single_z24_z1_z0:
+** (
+** mov z30\.d, z1\.d
+** mov z31\.d, z2\.d
+** |
+** mov z31\.d, z2\.d
+** mov z30\.d, z1\.d
+** )
+** fmul {z24\.s - z25\.s}, {z30\.s - z31\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24,
+ svmul_single_f32_x2 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z1_z24_z0:
+** fmul {z30\.s - z31\.s}, {z24\.s - z25\.s}, z0\.s
+** (
+** mov z2\.d, z31\.d
+** mov z1\.d, z30\.d
+** |
+** mov z1\.d, z30\.d
+** mov z2\.d, z31\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1,
+ svmul_single_f32_x2 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmul ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1,
+ svmul_single_f32_x2 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z18_z18_z0:
+** fmul {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18,
+ svmul_single_f32_x2 (z18, z0),
+ svmul (z18, z0))
+
+/*
+** mul_single_awkward:
+** ...
+** fmul {z0\.s - z1\.s}, {z30\.s - z31\.s}, z[0-9]+\.s
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat32x2_t, svfloat32_t,
+ z0_res = svmul_single_f32_x2 (z1, z0),
+ z0_res = svmul (z1, z0))
+
+/*
+** mul_single_z0_z0_z15:
+** ...
+** fmul {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat32x2_t, svfloat32_t,
+ z0 = svmul_single_f32_x2 (z0, z15),
+ z0 = svmul (z0, z15))
+
+/*
+** mul_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmul {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24,
+ svmul_single_f32_x2 (z24, z16),
+ svmul (z24, z16))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+#pragma GCC target "+sme2p2"
+
+/*
+** mul_z0_z0_z4:
+** fmul {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s}
+** ret
+*/
+TEST_XN (mul_z0_z0_z4, svfloat32x4_t, z0,
+ svmul_f32_x4 (z0, z4),
+ svmul (z0, z4))
+
+/*
+** mul_z0_z4_z0:
+** fmul {z0\.s - z3\.s}, {z4\.s - z7\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (mul_z0_z4_z0, svfloat32x4_t, z0,
+ svmul_f32_x4 (z4, z0),
+ svmul (z4, z0))
+
+/*
+** mul_z0_z4_z28:
+** fmul {z0\.s - z3\.s}, {z4\.s - z7\.s}, {z28\.s - z31\.s}
+** ret
+*/
+TEST_XN (mul_z0_z4_z28, svfloat32x4_t, z0,
+ svmul_f32_x4 (z4, z28),
+ svmul (z4, z28))
+
+/*
+** mul_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, {z4\.s - z7\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (mul_z18_z18_z4, svfloat32x4_t, z18,
+ svmul_f32_x4 (z18, z4),
+ svmul (z18, z4))
+
+/*
+** mul_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, {z28\.s - z31\.s}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (mul_z23_z23_z28, svfloat32x4_t, z23,
+ svmul_f32_x4 (z23, z28),
+ svmul (z23, z28))
+
+/*
+** mul_z28_z28_z0:
+** fmul {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s}
+** ret
+*/
+TEST_XN (mul_z28_z28_z0, svfloat32x4_t, z28,
+ svmul_f32_x4 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** |
+** fmul {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (mul_z0_z0_z18, svfloat32x4_t, z0,
+ svmul_f32_x4 (z0, z18),
+ svmul (z0, z18))
+
+/*
+** mul_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** |
+** fmul {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (mul_z4_z4_z23, svfloat32x4_t, z4,
+ svmul_f32_x4 (z4, z23),
+ svmul (z4, z23))
+
+/*
+** mul_single_z24_z24_z0:
+** fmul {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmul_single_f32_x4 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z24_z28_z0:
+** fmul {z24\.s - z27\.s}, {z28\.s - z31\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmul_single_f32_x4 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z24\.s - z27\.s}, {z28\.s - z31\.s}, z0\.s
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24,
+ svmul_single_f32_x4 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z1_z24_z0:
+** fmul {z28\.s - z31\.s}, {z24\.s - z27\.s}, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1,
+ svmul_single_f32_x4 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1,
+ svmul_single_f32_x4 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, z0\.s
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18,
+ svmul_single_f32_x4 (z18, z0),
+ svmul (z18, z0))
+
+/*
+** mul_single_awkward:
+** ...
+** fmul {z0\.s - z3\.s}, {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat32x4_t, svfloat32_t,
+ z0_res = svmul_single_f32_x4 (z1, z0),
+ z0_res = svmul (z1, z0))
+
+/*
+** mul_single_z0_z0_z15:
+** ...
+** fmul {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat32x4_t, svfloat32_t,
+ z0 = svmul_single_f32_x4 (z0, z15),
+ z0 = svmul (z0, z15))
+
+/*
+** mul_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmul {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24,
+ svmul_single_f32_x4 (z24, z16),
+ svmul (z24, z16))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+#pragma GCC target "+sme2p2"
+
+/*
+** mul_z0_z0_z4:
+** fmul {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (mul_z0_z0_z4, svfloat64x2_t, z0,
+ svmul_f64_x2 (z0, z4),
+ svmul (z0, z4))
+
+/*
+** mul_z0_z4_z0:
+** fmul {z0\.d - z1\.d}, {z4\.d - z5\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (mul_z0_z4_z0, svfloat64x2_t, z0,
+ svmul_f64_x2 (z4, z0),
+ svmul (z4, z0))
+
+/*
+** mul_z0_z4_z28:
+** fmul {z0\.d - z1\.d}, {z4\.d - z5\.d}, {z28\.d - z29\.d}
+** ret
+*/
+TEST_XN (mul_z0_z4_z28, svfloat64x2_t, z0,
+ svmul_f64_x2 (z4, z28),
+ svmul (z4, z28))
+
+/*
+** mul_z18_z18_z4:
+** fmul {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d}
+** ret
+*/
+TEST_XN (mul_z18_z18_z4, svfloat64x2_t, z18,
+ svmul_f64_x2 (z18, z4),
+ svmul (z18, z4))
+
+/*
+** mul_z23_z23_z18:
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, {z18\.d - z19\.d}
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (mul_z23_z23_z18, svfloat64x2_t, z23,
+ svmul_f64_x2 (z23, z18),
+ svmul (z23, z18))
+
+/*
+** mul_z28_z28_z0:
+** fmul {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d}
+** ret
+*/
+TEST_XN (mul_z28_z28_z0, svfloat64x2_t, z28,
+ svmul_f64_x2 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_z0_z0_z18:
+** fmul {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d}
+** ret
+*/
+TEST_XN (mul_z0_z0_z18, svfloat64x2_t, z0,
+ svmul_f64_x2 (z0, z18),
+ svmul (z0, z18))
+
+/*
+** mul_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** |
+** fmul {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (mul_z4_z4_z23, svfloat64x2_t, z4,
+ svmul_f64_x2 (z4, z23),
+ svmul (z4, z23))
+
+/*
+** mul_single_z24_z24_z0:
+** fmul {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmul_single_f64_x2 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z24_z28_z0:
+** fmul {z24\.d - z25\.d}, {z28\.d - z29\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmul_single_f64_x2 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_single_z24_z1_z0:
+** (
+** mov z30\.d, z1\.d
+** mov z31\.d, z2\.d
+** |
+** mov z31\.d, z2\.d
+** mov z30\.d, z1\.d
+** )
+** fmul {z24\.d - z25\.d}, {z30\.d - z31\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24,
+ svmul_single_f64_x2 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z1_z24_z0:
+** fmul {z30\.d - z31\.d}, {z24\.d - z25\.d}, z0\.d
+** (
+** mov z2\.d, z31\.d
+** mov z1\.d, z30\.d
+** |
+** mov z1\.d, z30\.d
+** mov z2\.d, z31\.d
+** )
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1,
+ svmul_single_f64_x2 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** fmul ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1,
+ svmul_single_f64_x2 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z18_z18_z0:
+** fmul {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18,
+ svmul_single_f64_x2 (z18, z0),
+ svmul (z18, z0))
+
+/*
+** mul_single_awkward:
+** ...
+** fmul {z0\.d - z1\.d}, {z30\.d - z31\.d}, z[0-9]+\.d
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat64x2_t, svfloat64_t,
+ z0_res = svmul_single_f64_x2 (z1, z0),
+ z0_res = svmul (z1, z0))
+
+/*
+** mul_single_z0_z0_z15:
+** ...
+** fmul {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat64x2_t, svfloat64_t,
+ z0 = svmul_single_f64_x2 (z0, z15),
+ z0 = svmul (z0, z15))
+
+/*
+** mul_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmul {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24,
+ svmul_single_f64_x2 (z24, z16),
+ svmul (z24, z16))
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+#pragma GCC target "+sme2p2"
+
+/*
+** mul_z0_z0_z4:
+** fmul {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d}
+** ret
+*/
+TEST_XN (mul_z0_z0_z4, svfloat64x4_t, z0,
+ svmul_f64_x4 (z0, z4),
+ svmul (z0, z4))
+
+/*
+** mul_z0_z4_z0:
+** fmul {z0\.d - z3\.d}, {z4\.d - z7\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (mul_z0_z4_z0, svfloat64x4_t, z0,
+ svmul_f64_x4 (z4, z0),
+ svmul (z4, z0))
+
+/*
+** mul_z0_z4_z28:
+** fmul {z0\.d - z3\.d}, {z4\.d - z7\.d}, {z28\.d - z31\.d}
+** ret
+*/
+TEST_XN (mul_z0_z4_z28, svfloat64x4_t, z0,
+ svmul_f64_x4 (z4, z28),
+ svmul (z4, z28))
+
+/*
+** mul_z18_z18_z4:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, {z4\.d - z7\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (mul_z18_z18_z4, svfloat64x4_t, z18,
+ svmul_f64_x4 (z18, z4),
+ svmul (z18, z4))
+
+/*
+** mul_z23_z23_z28:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, {z28\.d - z31\.d}
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN (mul_z23_z23_z28, svfloat64x4_t, z23,
+ svmul_f64_x4 (z23, z28),
+ svmul (z23, z28))
+
+/*
+** mul_z28_z28_z0:
+** fmul {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d}
+** ret
+*/
+TEST_XN (mul_z28_z28_z0, svfloat64x4_t, z28,
+ svmul_f64_x4 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_z0_z0_z18:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** |
+** fmul {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (mul_z0_z0_z18, svfloat64x4_t, z0,
+ svmul_f64_x4 (z0, z18),
+ svmul (z0, z18))
+
+/*
+** mul_z4_z4_z23:
+** (
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** |
+** fmul {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** )
+** ret
+*/
+TEST_XN (mul_z4_z4_z23, svfloat64x4_t, z4,
+ svmul_f64_x4 (z4, z23),
+ svmul (z4, z23))
+
+/*
+** mul_single_z24_z24_z0:
+** fmul {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmul_single_f64_x4 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z24_z28_z0:
+** fmul {z24\.d - z27\.d}, {z28\.d - z31\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmul_single_f64_x4 (z28, z0),
+ svmul (z28, z0))
+
+/*
+** mul_single_z24_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul {z24\.d - z27\.d}, {z28\.d - z31\.d}, z0\.d
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24,
+ svmul_single_f64_x4 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z1_z24_z0:
+** fmul {z28\.d - z31\.d}, {z24\.d - z27\.d}, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1,
+ svmul_single_f64_x4 (z24, z0),
+ svmul (z24, z0))
+
+/*
+** mul_single_z1_z1_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1,
+ svmul_single_f64_x4 (z1, z0),
+ svmul (z1, z0))
+
+/*
+** mul_single_z18_z18_z0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fmul [^\n]+, z0\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18,
+ svmul_single_f64_x4 (z18, z0),
+ svmul (z18, z0))
+
+/*
+** mul_single_awkward:
+** ...
+** fmul {z0\.d - z3\.d}, {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d
+** ret
+*/
+TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat64x4_t, svfloat64_t,
+ z0_res = svmul_single_f64_x4 (z1, z0),
+ z0_res = svmul (z1, z0))
+
+/*
+** mul_single_z0_z0_z15:
+** ...
+** fmul {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d
+** ...
+** ret
+*/
+TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat64x4_t, svfloat64_t,
+ z0 = svmul_single_f64_x4 (z0, z15),
+ z0 = svmul (z0, z15))
+
+/*
+** mul_single_z24_z24_z16:
+** mov (z[0-7])\.d, z16\.d
+** fmul {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d
+** ret
+*/
+TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24,
+ svmul_single_f64_x4 (z24, z16),
+ svmul (z24, z16))