From: Richard Henderson Date: Tue, 9 Jun 2026 19:21:03 +0000 (-0700) Subject: target/arm: Enable FEAT_FP8DOT2, FEAT_SSVE_FP8DOT2 for -cpu max X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c67d5463dabbf6418784fa0d4ac3a920c7b83aa2;p=thirdparty%2Fqemu.git target/arm: Enable FEAT_FP8DOT2, FEAT_SSVE_FP8DOT2 for -cpu max Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20260609192110.752384-40-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index f13721c327..0bcd707fc1 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -76,6 +76,7 @@ the following architecture extensions: - FEAT_FP (Floating Point extensions) - FEAT_FP16 (Half-precision floating-point data processing) - FEAT_FP8 (FP8 convert instructions) +- FEAT_FP8DOT2 (FP8 2-way dot product to half-precision instructions) - FEAT_FP8DOT4 (FP8 4-way dot product to single-precision instructions) - FEAT_FP8FMA (FP8 multiply-accumulate to half-precision and single-precision instructions) - FEAT_FPAC (Faulting on AUT* instructions) @@ -168,6 +169,7 @@ the following architecture extensions: - FEAT_SME_F64F64 (Double-precision floating-point outer product instructions) - FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions) - FEAT_SME_LUTv2 (Lookup table instructions with 4-bit indices and 8-bit elements) +- FEAT_SSVE_FP8DOT2 (SVE2 FP8 2-way dot product to half-precision instructions in Streaming SVE mode) - FEAT_SSVE_FP8DOT4 (SVE2 FP8 4-way dot product to single-precision instructions in Streaming SVE mode) - FEAT_SSVE_FP8FMA (SVE2 FP8 multiply-accumulate to half-precision and single-precision instructions in Streaming SVE mode) - FEAT_SVE (Scalable Vector Extension) diff --git a/linux-user/aarch64/elfload.c b/linux-user/aarch64/elfload.c index be33fede6b..d40e39169b 100644 --- a/linux-user/aarch64/elfload.c +++ b/linux-user/aarch64/elfload.c @@ -228,8 +228,10 @@ abi_ulong get_elf_hwcap2(CPUState *cs) ARM_HWCAP2_A64_F8E5M2); GET_FEATURE_ID(aa64_f8fma, ARM_HWCAP2_A64_F8FMA); GET_FEATURE_ID(aa64_f8dp4, ARM_HWCAP2_A64_F8DP4); + GET_FEATURE_ID(aa64_f8dp2, ARM_HWCAP2_A64_F8DP2); GET_FEATURE_ID(aa64_ssve_f8fma, ARM_HWCAP2_A64_SME_SF8FMA); GET_FEATURE_ID(aa64_ssve_f8dp4, ARM_HWCAP2_A64_SME_SF8DP4); + GET_FEATURE_ID(aa64_ssve_f8dp2, ARM_HWCAP2_A64_SME_SF8DP2); return hwcaps; } diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 831b190b21..3d73977f1e 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -1383,6 +1383,7 @@ void aarch64_max_tcg_initfn(Object *obj) SET_IDREG(isar, ID_AA64DFR0, t); t = GET_IDREG(isar, ID_AA64SMFR0); + t = FIELD_DP64(t, ID_AA64SMFR0, SF8DP2, 1); /* FEAT_SSVE_FP8DOT2 */ t = FIELD_DP64(t, ID_AA64SMFR0, SF8DP4, 1); /* FEAT_SSVE_FP8DOT4 */ t = FIELD_DP64(t, ID_AA64SMFR0, SF8FMA, 1); /* FEAT_SSVE_FP8FMA */ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */ @@ -1403,6 +1404,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = GET_IDREG(isar, ID_AA64FPFR0); t = FIELD_DP64(t, ID_AA64FPFR0, F8E5M2, 1); /* FEAT_FP8 */ t = FIELD_DP64(t, ID_AA64FPFR0, F8E4M3, 1); /* FEAT_FP8 */ + t = FIELD_DP64(t, ID_AA64FPFR0, F8DP2, 1); /* FEAT_FP8DOT2 */ t = FIELD_DP64(t, ID_AA64FPFR0, F8DP4, 1); /* FEAT_FP8DOT4 */ t = FIELD_DP64(t, ID_AA64FPFR0, F8FMA, 1); /* FEAT_FP8FMA */ t = FIELD_DP64(t, ID_AA64FPFR0, F8CVT, 1); /* FEAT_FP8 */