From: Richard Henderson Date: Tue, 9 Jun 2026 19:21:04 +0000 (-0700) Subject: target/arm: Implement FMMLA (FP8 to FP32) for AdvSIMD X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bd61776fee728e14976e6ed13caabb5378ab4568;p=thirdparty%2Fqemu.git target/arm: Implement FMMLA (FP8 to FP32) for AdvSIMD Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20260609192110.752384-41-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h index 1745989bce..be3db5300f 100644 --- a/target/arm/cpu-features.h +++ b/target/arm/cpu-features.h @@ -1645,6 +1645,11 @@ static inline bool isar_feature_aa64_f8dp2(const ARMISARegisters *id) return FIELD_EX64_IDREG(id, ID_AA64FPFR0, F8DP2); } +static inline bool isar_feature_aa64_f8mm8(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64FPFR0, F8MM8); +} + /* * Combinations of feature tests, for ease of use with TRANS_FEAT. */ diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index a3e404e7fe..6922e91010 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -1227,6 +1227,8 @@ FMLALL_sb_v 0.00 1110 0.0 rm:5 110001 rn:5 rd:5 \ FDOT_sb_v 0.00 1110 000 ..... 11111 1 ..... ..... @qrrr_s FDOT_hb_v 0.00 1110 010 ..... 11111 1 ..... ..... @qrrr_h +FMMLA_sb 0110 1110 100 ..... 11101 1 ..... ..... @rrr_q1e0 + ### Advanced SIMD scalar x indexed element FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h diff --git a/target/arm/tcg/fp8_helper.c b/target/arm/tcg/fp8_helper.c index 065df24b84..b9d4ba3b6a 100644 --- a/target/arm/tcg/fp8_helper.c +++ b/target/arm/tcg/fp8_helper.c @@ -807,3 +807,28 @@ void HELPER(gvec_fdot_idx_hb)(void *vd, void *vn, void *vm, clear_tail(vd, oprsz, simd_maxsz(desc)); } + +void HELPER(gvec_fmmla_sb)(void *vd, void *vn, void *vm, + CPUARMState *env, uint32_t desc) +{ + FP8MulContext ctx = fp8_mul_start(env, -1); + size_t oprsz = simd_oprsz(desc); + size_t nseg = oprsz / 16; + uint64_t *n = vn; + uint64_t *m = vm; + float32 *d = vd; + + for (size_t seg = 0; seg < nseg; seg++, d += 4, n += 2, m += 2) { + float32 d0 = f8dotadd_s(n[0], m[0], 8, d[H4(0)], &ctx); + float32 d1 = f8dotadd_s(n[0], m[1], 8, d[H4(1)], &ctx); + float32 d2 = f8dotadd_s(n[1], m[0], 8, d[H4(2)], &ctx); + float32 d3 = f8dotadd_s(n[1], m[1], 8, d[H4(3)], &ctx); + + d[H4(0)] = d0; + d[H4(1)] = d1; + d[H4(2)] = d2; + d[H4(3)] = d3; + } + + clear_tail(vd, oprsz, simd_maxsz(desc)); +} diff --git a/target/arm/tcg/helper-fp8-defs.h b/target/arm/tcg/helper-fp8-defs.h index 5995d77577..3c74f02022 100644 --- a/target/arm/tcg/helper-fp8-defs.h +++ b/target/arm/tcg/helper-fp8-defs.h @@ -35,3 +35,5 @@ DEF_HELPER_FLAGS_5(gvec_fdot_idx_sb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, DEF_HELPER_FLAGS_5(gvec_fdot_hb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) DEF_HELPER_FLAGS_5(gvec_fdot_idx_hb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_5(gvec_fmmla_sb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index e4c539fb18..ffe59b9471 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -7515,6 +7515,7 @@ static bool do_f8dot(DisasContext *s, arg_qrrr_e *a, TRANS_FEAT(FDOT_sb_v, aa64_f8dp4, do_f8dot, a, gen_helper_gvec_fdot_sb) TRANS_FEAT(FDOT_hb_v, aa64_f8dp2, do_f8dot, a, gen_helper_gvec_fdot_hb) +TRANS_FEAT(FMMLA_sb, aa64_f8mm8, do_f8dot, a, gen_helper_gvec_fmmla_sb) static bool do_f8dot_idx(DisasContext *s, arg_qrrx_e *a, gen_helper_gvec_3_ptr *fn)