bool require_immediate_range (unsigned int, HOST_WIDE_INT,
HOST_WIDE_INT);
bool require_immediate_lane_index (unsigned int, unsigned int, unsigned int);
+ bool require_immediate_lane_index (unsigned int, unsigned int);
bool check ();
return require_immediate_range (lane_argno, 0, nunits - 1);
}
+/* Require argument LANE_ARGNO to be an immediate lane index that selects
+ one element of argument VEC_ARGNO. Return true if the argument
+ is valid. */
+bool
+aarch64_pragma_builtins_checker::
+require_immediate_lane_index (unsigned int lane_argno, unsigned int vec_argno)
+{
+ return require_immediate_lane_index (lane_argno, vec_argno, vec_argno);
+}
+
/* Check the arguments to the intrinsic call and return true if they
are valid. */
bool
case UNSPEC_FDOT_LANE_FP8:
return require_immediate_lane_index (nargs - 2, nargs - 3, 0);
+ case UNSPEC_FMLALB_FP8:
+ case UNSPEC_FMLALT_FP8:
+ case UNSPEC_FMLALLBB_FP8:
+ case UNSPEC_FMLALLBT_FP8:
+ case UNSPEC_FMLALLTB_FP8:
+ case UNSPEC_FMLALLTT_FP8:
+ if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane)
+ return require_immediate_lane_index (nargs - 2, nargs - 3);
+ else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
+ return true;
+ else
+ gcc_unreachable ();
+
case UNSPEC_LUTI2:
case UNSPEC_LUTI4:
{
ops[0].mode, ops[3].mode);
break;
+ case UNSPEC_FMLALB_FP8:
+ case UNSPEC_FMLALT_FP8:
+ case UNSPEC_FMLALLBB_FP8:
+ case UNSPEC_FMLALLBT_FP8:
+ case UNSPEC_FMLALLTB_FP8:
+ case UNSPEC_FMLALLTT_FP8:
+ if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane)
+ {
+ ops[4].value = aarch64_endian_lane_rtx (ops[3].mode,
+ INTVAL (ops[4].value));
+ icode = code_for_aarch64_lane (builtin_data.unspec,
+ ops[0].mode, ops[3].mode);
+ }
+ else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
+ icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode);
+ else
+ gcc_unreachable ();
+ break;
+
case UNSPEC_LUTI2:
case UNSPEC_LUTI4:
create_integer_operand (ops.safe_push ({}),
aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile);
+ aarch64_def_or_undef (TARGET_FP8FMA, "__ARM_FEATURE_FP8FMA", pfile);
+
aarch64_def_or_undef (TARGET_LS64,
"__ARM_FEATURE_LS64", pfile);
aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile);
ENTRY_TERNARY_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, \
UNSPEC_FDOT_LANE_FP8, FP8)
+#undef ENTRY_FMA_FPM
+#define ENTRY_FMA_FPM(N, T, U) \
+ ENTRY_TERNARY (N##q_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U, FP8) \
+ ENTRY_TERNARY_LANE (N##q_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U, FP8) \
+ ENTRY_TERNARY_LANE (N##q_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U, FP8)
+
// faminmax
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
ENTRY_BINARY_VHSDF (vamax, UNSPEC_FAMAX, FP)
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4)
ENTRY_VDOT_FPM (f32)
#undef REQUIRED_EXTENSIONS
+
+// fp8 multiply-add
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8FMA)
+ENTRY_FMA_FPM (vmlalb, f16, UNSPEC_FMLALB_FP8)
+ENTRY_FMA_FPM (vmlalt, f16, UNSPEC_FMLALT_FP8)
+ENTRY_FMA_FPM (vmlallbb, f32, UNSPEC_FMLALLBB_FP8)
+ENTRY_FMA_FPM (vmlallbt, f32, UNSPEC_FMLALLBT_FP8)
+ENTRY_FMA_FPM (vmlalltb, f32, UNSPEC_FMLALLTB_FP8)
+ENTRY_FMA_FPM (vmlalltt, f32, UNSPEC_FMLALLTT_FP8)
+#undef REQUIRED_EXTENSIONS
""
"<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>, %3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]"
)
+
+;; fpm fma instructions.
+(define_insn "@aarch64_<insn><mode>"
+ [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
+ (unspec:V8HF_ONLY
+ [(match_operand:V8HF_ONLY 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "w")
+ (match_operand:V16QI 3 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FMLAL_FP8_HF))]
+ "TARGET_FP8FMA"
+ "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
+)
+
+(define_insn "@aarch64_<insn>_lane<V8HF_ONLY:mode><VB:mode>"
+ [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
+ (unspec:V8HF_ONLY
+ [(match_operand:V8HF_ONLY 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "w")
+ (vec_duplicate:V16QI
+ (vec_select:QI
+ (match_operand:VB 3 "register_operand" "w")
+ (parallel [(match_operand:SI 4 "immediate_operand")])))
+ (reg:DI FPM_REGNUM)]
+ FMLAL_FP8_HF))]
+ "TARGET_FP8FMA"
+ {
+ operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
+ INTVAL (operands[4]));
+ return "<insn>\t%0.<V8HF_ONLY:Vtype>, %2.16b, %3.b[%4]";
+ }
+)
+
+(define_insn "@aarch64_<insn><mode>"
+ [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
+ (unspec:V4SF_ONLY
+ [(match_operand:V4SF_ONLY 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "w")
+ (match_operand:V16QI 3 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FMLALL_FP8_SF))]
+ "TARGET_FP8FMA"
+ "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
+)
+
+(define_insn "@aarch64_<insn>_lane<V4SF_ONLY:mode><VB:mode>"
+ [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
+ (unspec:V4SF_ONLY
+ [(match_operand:V4SF_ONLY 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "w")
+ (vec_duplicate:V16QI
+ (vec_select:QI
+ (match_operand:VB 3 "register_operand" "w")
+ (parallel [(match_operand:SI 4 "immediate_operand")])))
+ (reg:DI FPM_REGNUM)]
+ FMLALL_FP8_SF))]
+ "TARGET_FP8FMA"
+ {
+ operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
+ INTVAL (operands[4]));
+ return "<insn>\t%0.<V4SF_ONLY:Vtype>, %2.16b, %3.b[%4]";
+ }
+)
;; - FMLALLTT (indexed) (FP8FMA)
;; -------------------------------------------------------------------------
-(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx8hf><mode>"
+(define_insn "@aarch64_sve_add_<insn><mode>"
[(set (match_operand:VNx8HF_ONLY 0 "register_operand")
(unspec:VNx8HF_ONLY
[(match_operand:VNx8HF 1 "register_operand")
(match_operand:VNx16QI 2 "register_operand")
(match_operand:VNx16QI 3 "register_operand")
(reg:DI FPM_REGNUM)]
- SVE2_FP8_TERNARY_VNX8HF))]
+ FMLAL_FP8_HF))]
"TARGET_SSVE_FP8FMA"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , 0 , w , w ; * ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b
- [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b
+ [ w , 0 , w , w ; * ] <insn>\t%0.h, %2.b, %3.b
+ [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b
}
)
-(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx4sf><mode>"
+(define_insn "@aarch64_sve_add_<insn><mode>"
[(set (match_operand:VNx4SF_ONLY 0 "register_operand")
(unspec:VNx4SF_ONLY
[(match_operand:VNx4SF 1 "register_operand")
(match_operand:VNx16QI 2 "register_operand")
(match_operand:VNx16QI 3 "register_operand")
(reg:DI FPM_REGNUM)]
- SVE2_FP8_TERNARY_VNX4SF))]
+ FMLALL_FP8_SF))]
"TARGET_SSVE_FP8FMA"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , 0 , w , w ; * ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b
- [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b
+ [ w , 0 , w , w ; * ] <insn>\t%0.s, %2.b, %3.b
+ [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b
}
)
-(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx8hf><mode>"
+(define_insn "@aarch64_sve_add_lane_<insn><mode>"
[(set (match_operand:VNx8HF_ONLY 0 "register_operand")
(unspec:VNx8HF_ONLY
[(match_operand:VNx8HF 1 "register_operand")
(match_operand:VNx16QI 3 "register_operand")
(match_operand:SI 4 "const_int_operand")
(reg:DI FPM_REGNUM)]
- SVE2_FP8_TERNARY_LANE_VNX8HF))]
+ FMLAL_FP8_HF))]
"TARGET_SSVE_FP8FMA"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , 0 , w , y ; * ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b[%4]
- [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b[%4]
+ [ w , 0 , w , y ; * ] <insn>\t%0.h, %2.b, %3.b[%4]
+ [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b[%4]
}
)
-(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx4sf><mode>"
+(define_insn "@aarch64_sve_add_lane_<insn><mode>"
[(set (match_operand:VNx4SF_ONLY 0 "register_operand")
(unspec:VNx4SF_ONLY
[(match_operand:VNx4SF 1 "register_operand")
(match_operand:VNx16QI 3 "register_operand")
(match_operand:SI 4 "const_int_operand")
(reg:DI FPM_REGNUM)]
- SVE2_FP8_TERNARY_LANE_VNX4SF))]
+ FMLALL_FP8_SF))]
"TARGET_SSVE_FP8FMA"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , 0 , w , y ; * ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b[%4]
- [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b[%4]
+ [ w , 0 , w , y ; * ] <insn>\t%0.s, %2.b, %3.b[%4]
+ [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b[%4]
}
)
;; Iterators for single modes, for "@" patterns.
(define_mode_iterator SI_ONLY [SI])
(define_mode_iterator DI_ONLY [DI])
+(define_mode_iterator V8HF_ONLY [V8HF])
(define_mode_iterator V4SF_ONLY [V4SF])
;; Iterator for all integer modes (up to 64-bit)
UNSPEC_F1CVTLT
UNSPEC_F2CVTLT])
-(define_int_iterator SVE2_FP8_TERNARY_VNX8HF
+(define_int_iterator FMLAL_FP8_HF
[UNSPEC_FMLALB_FP8
UNSPEC_FMLALT_FP8])
-(define_int_iterator SVE2_FP8_TERNARY_VNX4SF
- [UNSPEC_FMLALLBB_FP8
- UNSPEC_FMLALLBT_FP8
- UNSPEC_FMLALLTB_FP8
- UNSPEC_FMLALLTT_FP8])
-
-(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX8HF
- [UNSPEC_FMLALB_FP8
- UNSPEC_FMLALT_FP8])
-
-(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX4SF
+(define_int_iterator FMLALL_FP8_SF
[UNSPEC_FMLALLBB_FP8
UNSPEC_FMLALLBT_FP8
UNSPEC_FMLALLTB_FP8
(UNSPEC_FCVTN_FP8 "fcvtn")
(UNSPEC_FDOT_FP8 "fdot")
(UNSPEC_FDOT_LANE_FP8 "fdot")
+ (UNSPEC_FMLALB_FP8 "fmlalb")
+ (UNSPEC_FMLALT_FP8 "fmlalt")
+ (UNSPEC_FMLALLBB_FP8 "fmlallbb")
+ (UNSPEC_FMLALLBT_FP8 "fmlallbt")
+ (UNSPEC_FMLALLTB_FP8 "fmlalltb")
+ (UNSPEC_FMLALLTT_FP8 "fmlalltt")
(UNSPEC_FSCALE "fscale")])
;; The optab associated with an operation. Note that for ANDF, IORF
(UNSPEC_F2CVT "f2cvt")
(UNSPEC_F1CVTLT "f1cvtlt")
(UNSPEC_F2CVTLT "f2cvtlt")])
-
-(define_int_attr sve2_fp8_fma_op_vnx8hf
- [(UNSPEC_FMLALB_FP8 "fmlalb")
- (UNSPEC_FMLALT_FP8 "fmlalt")])
-
-(define_int_attr sve2_fp8_fma_op_vnx4sf
- [(UNSPEC_FMLALLBB_FP8 "fmlallbb")
- (UNSPEC_FMLALLBT_FP8 "fmlallbt")
- (UNSPEC_FMLALLTB_FP8 "fmlalltb")
- (UNSPEC_FMLALLTT_FP8 "fmlalltt")])
#error Foo
#endif
+#pragma GCC target "arch=armv9-a+fp8fma"
+#ifndef __ARM_FEATURE_FP8
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_FP8DOT4
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_FP8DOT2
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_FP8FMA
+#error Foo
+#endif
+
#pragma GCC target "arch=armv9-a+fp8dot4"
#ifndef __ARM_FEATURE_FP8
#error Foo
#endif
+#ifndef __ARM_FEATURE_FP8FMA
+#error Foo
+#endif
#ifndef __ARM_FEATURE_FP8DOT4
#error Foo
#endif
#ifndef __ARM_FEATURE_FP8
#error Foo
#endif
+#ifndef __ARM_FEATURE_FP8FMA
+#error Foo
+#endif
#ifndef __ARM_FEATURE_FP8DOT4
#error Foo
#endif
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8fma" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vmlalbq_f16_fpm:
+** msr fpmr, x0
+** fmlalb v0.8h, v1.16b, v2.16b
+** ret
+*/
+float16x8_t
+test_vmlalbq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlalbq_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlaltq_f16_fpm:
+** msr fpmr, x0
+** fmlalt v0.8h, v1.16b, v2.16b
+** ret
+*/
+float16x8_t
+test_vmlaltq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlaltq_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlallbbq_f32_fpm:
+** msr fpmr, x0
+** fmlallbb v0.4s, v1.16b, v2.16b
+** ret
+*/
+float32x4_t
+test_vmlallbbq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlallbbq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlallbtq_f32_fpm:
+** msr fpmr, x0
+** fmlallbt v0.4s, v1.16b, v2.16b
+** ret
+*/
+float32x4_t
+test_vmlallbtq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlallbtq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlalltbq_f32_fpm:
+** msr fpmr, x0
+** fmlalltb v0.4s, v1.16b, v2.16b
+** ret
+*/
+float32x4_t
+test_vmlalltbq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlalltbq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlallttq_f32_fpm:
+** msr fpmr, x0
+** fmlalltt v0.4s, v1.16b, v2.16b
+** ret
+*/
+float32x4_t
+test_vmlallttq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlallttq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlalbq_lane_f16_fpm_0:
+** msr fpmr, x0
+** fmlalb v0.8h, v1.16b, v2.b\[0\]
+** ret
+*/
+float16x8_t
+test_vmlalbq_lane_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlalbq_lane_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlalbq_lane_f16_fpm_7:
+** msr fpmr, x0
+** fmlalb v0.8h, v1.16b, v2.b\[7\]
+** ret
+*/
+float16x8_t
+test_vmlalbq_lane_f16_fpm_7 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlalbq_lane_f16_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlalbq_laneq_f16_fpm_0:
+** msr fpmr, x0
+** fmlalb v0.8h, v1.16b, v2.b\[0\]
+** ret
+*/
+float16x8_t
+test_vmlalbq_laneq_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlalbq_laneq_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlalbq_laneq_f16_fpm_15:
+** msr fpmr, x0
+** fmlalb v0.8h, v1.16b, v2.b\[15\]
+** ret
+*/
+float16x8_t
+test_vmlalbq_laneq_f16_fpm_15 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlalbq_laneq_f16_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlaltq_lane_f16_fpm_0:
+** msr fpmr, x0
+** fmlalt v0.8h, v1.16b, v2.b\[0\]
+** ret
+*/
+float16x8_t
+test_vmlaltq_lane_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlaltq_lane_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlaltq_lane_f16_fpm_7:
+** msr fpmr, x0
+** fmlalt v0.8h, v1.16b, v2.b\[7\]
+** ret
+*/
+float16x8_t
+test_vmlaltq_lane_f16_fpm_7 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlaltq_lane_f16_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlaltq_laneq_f16_fpm_0:
+** msr fpmr, x0
+** fmlalt v0.8h, v1.16b, v2.b\[0\]
+** ret
+*/
+float16x8_t
+test_vmlaltq_laneq_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlaltq_laneq_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlaltq_laneq_f16_fpm_15:
+** msr fpmr, x0
+** fmlalt v0.8h, v1.16b, v2.b\[15\]
+** ret
+*/
+float16x8_t
+test_vmlaltq_laneq_f16_fpm_15 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlaltq_laneq_f16_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlallbbq_lane_f32_fpm_0:
+** msr fpmr, x0
+** fmlallbb v0.4s, v1.16b, v2.b\[0\]
+** ret
+*/
+float32x4_t
+test_vmlallbbq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlallbbq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallbbq_lane_f32_fpm_7:
+** msr fpmr, x0
+** fmlallbb v0.4s, v1.16b, v2.b\[7\]
+** ret
+*/
+float32x4_t
+test_vmlallbbq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlallbbq_lane_f32_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlallbbq_laneq_f32_fpm_0:
+** msr fpmr, x0
+** fmlallbb v0.4s, v1.16b, v2.b\[0\]
+** ret
+*/
+float32x4_t
+test_vmlallbbq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlallbbq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallbbq_laneq_f32_fpm_15:
+** msr fpmr, x0
+** fmlallbb v0.4s, v1.16b, v2.b\[15\]
+** ret
+*/
+float32x4_t
+test_vmlallbbq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlallbbq_laneq_f32_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlallbtq_lane_f32_fpm_0:
+** msr fpmr, x0
+** fmlallbt v0.4s, v1.16b, v2.b\[0\]
+** ret
+*/
+float32x4_t
+test_vmlallbtq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlallbtq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallbtq_lane_f32_fpm_7:
+** msr fpmr, x0
+** fmlallbt v0.4s, v1.16b, v2.b\[7\]
+** ret
+*/
+float32x4_t
+test_vmlallbtq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlallbtq_lane_f32_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlallbtq_laneq_f32_fpm_0:
+** msr fpmr, x0
+** fmlallbt v0.4s, v1.16b, v2.b\[0\]
+** ret
+*/
+float32x4_t
+test_vmlallbtq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlallbtq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallbtq_laneq_f32_fpm_15:
+** msr fpmr, x0
+** fmlallbt v0.4s, v1.16b, v2.b\[15\]
+** ret
+*/
+float32x4_t
+test_vmlallbtq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlallbtq_laneq_f32_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlalltbq_lane_f32_fpm_0:
+** msr fpmr, x0
+** fmlalltb v0.4s, v1.16b, v2.b\[0\]
+** ret
+*/
+float32x4_t
+test_vmlalltbq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlalltbq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlalltbq_lane_f32_fpm_7:
+** msr fpmr, x0
+** fmlalltb v0.4s, v1.16b, v2.b\[7\]
+** ret
+*/
+float32x4_t
+test_vmlalltbq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlalltbq_lane_f32_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlalltbq_laneq_f32_fpm_0:
+** msr fpmr, x0
+** fmlalltb v0.4s, v1.16b, v2.b\[0\]
+** ret
+*/
+float32x4_t
+test_vmlalltbq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlalltbq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlalltbq_laneq_f32_fpm_15:
+** msr fpmr, x0
+** fmlalltb v0.4s, v1.16b, v2.b\[15\]
+** ret
+*/
+float32x4_t
+test_vmlalltbq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlalltbq_laneq_f32_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlallttq_lane_f32_fpm_0:
+** msr fpmr, x0
+** fmlalltt v0.4s, v1.16b, v2.b\[0\]
+** ret
+*/
+float32x4_t
+test_vmlallttq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlallttq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallttq_lane_f32_fpm_7:
+** msr fpmr, x0
+** fmlalltt v0.4s, v1.16b, v2.b\[7\]
+** ret
+*/
+float32x4_t
+test_vmlallttq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vmlallttq_lane_f32_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlallttq_laneq_f32_fpm_0:
+** msr fpmr, x0
+** fmlalltt v0.4s, v1.16b, v2.b\[0\]
+** ret
+*/
+float32x4_t
+test_vmlallttq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlallttq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallttq_laneq_f32_fpm_15:
+** msr fpmr, x0
+** fmlalltt v0.4s, v1.16b, v2.b\[15\]
+** ret
+*/
+float32x4_t
+test_vmlallttq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vmlallttq_laneq_f32_mf8_fpm (a, b, c, 15, d);
+}
--- /dev/null
+/* { dg-do compile } */
+
+#include "arm_neon.h"
+
+#pragma GCC target "+fp8dot4+fp8dot2"
+
+void
+test(float16x4_t f16, float16x8_t f16q, float32x2_t f32,
+ float32x4_t f32q, mfloat8x8_t mf8, mfloat8x16_t mf8q, int x,
+ fpm_t fpm)
+{
+ vmlalbq_lane_f16_mf8_fpm (f16q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlalbq_lane_f16_mf8_fpm' must be an integer constant expression} } */
+ vmlalbq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlalbq_laneq_f16_mf8_fpm' must be an integer constant expression} } */
+ vmlaltq_lane_f16_mf8_fpm (f16q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlaltq_lane_f16_mf8_fpm' must be an integer constant expression} } */
+ vmlaltq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlaltq_laneq_f16_mf8_fpm' must be an integer constant expression} } */
+
+ vmlallbbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlallbbq_lane_f32_mf8_fpm' must be an integer constant expression} } */
+ vmlallbbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlallbbq_laneq_f32_mf8_fpm' must be an integer constant expression} } */
+ vmlallbtq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlallbtq_lane_f32_mf8_fpm' must be an integer constant expression} } */
+ vmlallbtq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlallbtq_laneq_f32_mf8_fpm' must be an integer constant expression} } */
+ vmlalltbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlalltbq_lane_f32_mf8_fpm' must be an integer constant expression} } */
+ vmlalltbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlalltbq_laneq_f32_mf8_fpm' must be an integer constant expression} } */
+ vmlallttq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlallttq_lane_f32_mf8_fpm' must be an integer constant expression} } */
+ vmlallttq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlallttq_laneq_f32_mf8_fpm' must be an integer constant expression} } */
+
+ vmlalbq_lane_f16_mf8_fpm (f16q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlalbq_lane_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlalbq_lane_f16_mf8_fpm (f16q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlalbq_lane_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlalbq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlalbq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+ vmlalbq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlalbq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+
+ vmlaltq_lane_f16_mf8_fpm (f16q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlaltq_lane_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlaltq_lane_f16_mf8_fpm (f16q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlaltq_lane_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlaltq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlaltq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+ vmlaltq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlaltq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+
+ vmlallbbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallbbq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlallbbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlallbbq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlallbbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallbbq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+ vmlallbbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlallbbq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+
+ vmlallbtq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallbtq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlallbtq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlallbtq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlallbtq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallbtq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+ vmlallbtq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlallbtq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+
+ vmlalltbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlalltbq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlalltbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlalltbq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlalltbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlalltbq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+ vmlalltbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlalltbq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+
+ vmlallttq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallttq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlallttq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlallttq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */
+ vmlallttq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallttq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+ vmlallttq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlallttq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */
+}