From: Sivan Shani Date: Wed, 17 Dec 2025 17:22:55 +0000 (+0000) Subject: AArch64 v9.7 extensions: FEAT_F16MM X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3eb520ce6a859d5deeb645215689595ccb848125;p=thirdparty%2Fbinutils-gdb.git AArch64 v9.7 extensions: FEAT_F16MM This patch includes: - Feature flag for FEAT_F16MM - Instructions: - FMMLA (non-widening) Half-precision matrix multiply-accumulate - FMMLA (non-widening) Floating-point matrix multiply-accumulate --- diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c index 6ceda91290c..916955b4177 100644 --- a/gas/config/tc-aarch64.c +++ b/gas/config/tc-aarch64.c @@ -10923,6 +10923,7 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = { {"sme2p3", AARCH64_FEATURE (SME2p3), AARCH64_FEATURES (2, SME2p2, SME_LUTv2)}, {"f16f32dot", AARCH64_FEATURE (F16F32DOT), AARCH64_FEATURE (SIMD)}, {"f16f32mm", AARCH64_FEATURE (F16F32MM), AARCH64_FEATURES (2, SIMD, F16)}, + {"f16mm", AARCH64_FEATURE (F16MM), AARCH64_FEATURES (2, SIMD, F16)}, {NULL, AARCH64_NO_FEATURES, AARCH64_NO_FEATURES}, }; diff --git a/gas/doc/c-aarch64.texi b/gas/doc/c-aarch64.texi index c20e359c804..1ee3ae04b04 100644 --- a/gas/doc/c-aarch64.texi +++ b/gas/doc/c-aarch64.texi @@ -223,6 +223,8 @@ automatically cause those extensions to be disabled. @tab Enable Armv9.7 f16f32dot instructions. @item @code{f16f32mm} @tab @code{simd}, @code{fp16} @tab Enable Armv9.7 f16f32mm instructions. +@item @code{f16mm} @tab @code{simd}, @code{fp16} + @tab Enable support f16mm instructions. @item @code{fprcvt} @tab @code{fp} @tab Enable Armv9.6 fprcvt instructions. @item @code{frintts} @tab @code{fp} diff --git a/gas/testsuite/gas/aarch64/f16mm-simd.d b/gas/testsuite/gas/aarch64/f16mm-simd.d new file mode 100644 index 00000000000..0643339af61 --- /dev/null +++ b/gas/testsuite/gas/aarch64/f16mm-simd.d @@ -0,0 +1,12 @@ +#as: -march=armv8-a+f16mm +#objdump: -dr + +.*: file format .* + +Disassembly of section \.text: + +0+ <\.text>: + *[0-9a-f]+: 4ec0ec00 fmmla v0.8h, v0.8h, v0.8h + *[0-9a-f]+: 4ec0ec1f fmmla v31.8h, v0.8h, v0.8h + *[0-9a-f]+: 4ec0efe0 fmmla v0.8h, v31.8h, v0.8h + *[0-9a-f]+: 4edfec00 fmmla v0.8h, v0.8h, v31.8h diff --git a/gas/testsuite/gas/aarch64/f16mm-simd.s b/gas/testsuite/gas/aarch64/f16mm-simd.s new file mode 100644 index 00000000000..359cc207e23 --- /dev/null +++ b/gas/testsuite/gas/aarch64/f16mm-simd.s @@ -0,0 +1,4 @@ +fmmla v0.8h,v0.8h, v0.8h +fmmla v31.8h,v0.8h, v0.8h +fmmla v0.8h,v31.8h, v0.8h +fmmla v0.8h,v0.8h, v31.8h diff --git a/gas/testsuite/gas/aarch64/f16mm-sve2p2.d b/gas/testsuite/gas/aarch64/f16mm-sve2p2.d new file mode 100644 index 00000000000..d4d4a524399 --- /dev/null +++ b/gas/testsuite/gas/aarch64/f16mm-sve2p2.d @@ -0,0 +1,12 @@ +#as: -march=armv8-a+sve2p2+f16mm +#objdump: -dr + +.*: file format .* + +Disassembly of section \.text: + +0+ <\.text>: + *[0-9a-f]+: 64a0e000 fmmla z0.h, z0.h, z0.h + *[0-9a-f]+: 64a0e01f fmmla z31.h, z0.h, z0.h + *[0-9a-f]+: 64a0e3e0 fmmla z0.h, z31.h, z0.h + *[0-9a-f]+: 64bfe000 fmmla z0.h, z0.h, z31.h diff --git a/gas/testsuite/gas/aarch64/f16mm-sve2p2.s b/gas/testsuite/gas/aarch64/f16mm-sve2p2.s new file mode 100644 index 00000000000..59ec90a8961 --- /dev/null +++ b/gas/testsuite/gas/aarch64/f16mm-sve2p2.s @@ -0,0 +1,4 @@ +fmmla z0.h, z0.h, z0.h +fmmla z31.h, z0.h, z0.h +fmmla z0.h, z31.h, z0.h +fmmla z0.h, z0.h, z31.h diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h index d2de7efa765..e207677893d 100644 --- a/include/opcode/aarch64.h +++ b/include/opcode/aarch64.h @@ -275,6 +275,8 @@ enum aarch64_feature_bit { AARCH64_FEATURE_F16F32DOT, /* F16F32MM instructions. */ AARCH64_FEATURE_F16F32MM, + /* F16MM instructions. */ + AARCH64_FEATURE_F16MM, /* Virtual features. These are used to gate instructions that are enabled by either of two (or more) sets of command line flags. */ diff --git a/opcodes/aarch64-dis-2.c b/opcodes/aarch64-dis-2.c index 3c043f897b4..e615fe1ed75 100644 --- a/opcodes/aarch64-dis-2.c +++ b/opcodes/aarch64-dis-2.c @@ -20249,10 +20249,20 @@ aarch64_opcode_lookup_1 (uint32_t word) { if (((word >> 31) & 0x1) == 0) { - /* 33222222222211111111110000000000 - 10987654321098765432109876543210 - 011001x0101xxxxx111xxxxxxxxxxxxx. */ - return A64_OPID_64a0e400_fmmla_SVE_Zd_SVE_Zn_SVE_Zm_16; + if (((word >> 10) & 0x1) == 0) + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + 011001x0101xxxxx111xx0xxxxxxxxxx. */ + return A64_OPID_64a0e000_fmmla_SVE_Zd_SVE_Zn_SVE_Zm_16; + } + else + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + 011001x0101xxxxx111xx1xxxxxxxxxx. */ + return A64_OPID_64a0e400_fmmla_SVE_Zd_SVE_Zn_SVE_Zm_16; + } } else { @@ -28972,10 +28982,20 @@ aarch64_opcode_lookup_1 (uint32_t word) } else { - /* 33222222222211111111110000000000 - 10987654321098765432109876543210 - 0x001110xx0xxxxxx11011xxxxxxxxxx. */ - return A64_OPID_4e40ec00_fmmla_Vd_Vn_Vm; + if (((word >> 23) & 0x1) == 0) + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + 0x0011100x0xxxxxx11011xxxxxxxxxx. */ + return A64_OPID_4e40ec00_fmmla_Vd_Vn_Vm; + } + else + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + 0x0011101x0xxxxxx11011xxxxxxxxxx. */ + return A64_OPID_4ec0ec00_fmmla_Vd_Vn_Vm; + } } } } diff --git a/opcodes/aarch64-tbl-2.h b/opcodes/aarch64-tbl-2.h index aef23a03701..3c38021031e 100644 --- a/opcodes/aarch64-tbl-2.h +++ b/opcodes/aarch64-tbl-2.h @@ -4041,5 +4041,7 @@ enum aarch64_opcode_idx A64_OPID_0f409000_fdot_Vd_Vn_Em16, A64_OPID_0e80fc00_fdot_Vd_Vn_Vm, A64_OPID_4e40ec00_fmmla_Vd_Vn_Vm, + A64_OPID_4ec0ec00_fmmla_Vd_Vn_Vm, + A64_OPID_64a0e000_fmmla_SVE_Zd_SVE_Zn_SVE_Zm_16, A64_OPID_MAX, }; diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h index 59aef335bbc..61f87bb9113 100644 --- a/opcodes/aarch64-tbl.h +++ b/opcodes/aarch64-tbl.h @@ -2817,6 +2817,12 @@ { \ QLF3(V_4S, V_8H, S_H), \ } + +/* e.g. FMMLA .8s, .8h, .8h */ +#define QL_V3SAME8H \ +{ \ + QLF3 (V_8H, V_8H, V_8H), \ +} /* Opcode table. */ @@ -3080,6 +3086,10 @@ static const aarch64_feature_set aarch64_feature_f16f32dot = AARCH64_FEATURE (F16F32DOT); static const aarch64_feature_set aarch64_feature_f16f32mm = AARCH64_FEATURE (F16F32MM); +static const aarch64_feature_set aarch64_feature_f16mm = + AARCH64_FEATURE (F16MM); +static const aarch64_feature_set aarch64_feature_f16mm_sve2p2 = + AARCH64_FEATURES (2, F16MM, SVE2p2); #define CORE &aarch64_feature_v8 #define FP &aarch64_feature_fp @@ -3211,6 +3221,8 @@ static const aarch64_feature_set aarch64_feature_f16f32mm = #define SVE2p3_SME2p3 &aarch64_feature_sve2p3_sme2p3 #define F16F32DOT &aarch64_feature_f16f32dot #define F16F32MM &aarch64_feature_f16f32mm +#define F16MM &aarch64_feature_f16mm +#define F16MM_SVE2p2 &aarch64_feature_f16mm_sve2p2 #define CORE_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS) \ { NAME, OPCODE, MASK, CLASS, OP, CORE, OPS, QUALS, FLAGS | F_INVALID_IMM_SYMS_1, 0, 0, NULL } @@ -3559,6 +3571,10 @@ static const aarch64_feature_set aarch64_feature_f16f32mm = { NAME, OPCODE, MASK, CLASS, 0, F16F32DOT, OPS, QUALS, FLAGS, 0, 0, NULL } #define F16F32MM_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \ { NAME, OPCODE, MASK, CLASS, 0, F16F32MM, OPS, QUALS, FLAGS, 0, 0, NULL } +#define F16MM_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \ + { NAME, OPCODE, MASK, CLASS, 0, F16MM, OPS, QUALS, FLAGS, 0, 0, NULL } +#define F16MM_SVE2p2_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \ + { NAME, OPCODE, MASK, CLASS, 0, F16MM, OPS, QUALS, FLAGS | F_STRICT, 0, 0, NULL } #define MOPS_CPY_OP1_OP2_PME_INSN(NAME, OPCODE, MASK, FLAGS, CONSTRAINTS) \ MOPS_INSN (NAME, OPCODE, MASK, 0, \ @@ -7858,6 +7874,10 @@ const struct aarch64_opcode aarch64_opcode_table[] = /* F16F32MM instructions. */ F16F32MM_INSN ("fmmla", 0x4e40ec00, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_BFMMLA, F_SIZEQ), + /* F16MM instructions. */ + F16MM_INSN ("fmmla", 0x4ec0ec00, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3SAME8H, 0), + F16MM_SVE2p2_INSN ("fmmla", 0x64a0e000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0), + {0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL}, };