From 71e59ebefc25c50465e3fe5943a15c5da9120c8c Mon Sep 17 00:00:00 2001 From: Andrew Carlotti Date: Fri, 31 Jan 2025 05:07:30 +0000 Subject: [PATCH] aarch64: Support +sme+nosve permissively There is inconsistency regarding whether or not +sme implies +sve2 and whether +nosve2 implies +nosme. In particular, GCC 14 assumes the dependency exists, and canonicalises target strings accordingly, whereas LLVM treats the features as independent. This patch removes the positive implication while retaining the negative implication. This is the more permissive choice in each case, and allows us to support target strings written with either interpretation in mind. This reduces our ability to detect invalid instructions, but we already can't rely on this detection because gas doesn't know whether functions might be executed in streaming mode and/or non-streaming mode. The aarch64_feature_enable_set change is functionally redundant within this patch. It is included because the longer term intention is to instead remove the workaround in aarch64_parse_features, once the internal feature checks have been modified to support having both AARCH64_FEATURE_SME set and AARCH64_FEATURE_SVE unset. Similarly, the dependency from +sme to +fp16 is currently redundant, but this redundancy relies upon an incorrect dependency from +fcma to +fp16. This can be fixed in the future, but it might require modifying internal feature checks for a few FCMA instructions, so it's left unchanged for now. --- gas/config/tc-aarch64.c | 23 ++++++++++++++++++++++- gas/doc/c-aarch64.texi | 4 ++-- gas/testsuite/gas/aarch64/sme-nosve-1.d | 15 +++++++++++++++ gas/testsuite/gas/aarch64/sme-nosve-1.s | 3 +++ 4 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 gas/testsuite/gas/aarch64/sme-nosve-1.d create mode 100644 gas/testsuite/gas/aarch64/sme-nosve-1.s diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c index b40886dc08c..122ff49ca1a 100644 --- a/gas/config/tc-aarch64.c +++ b/gas/config/tc-aarch64.c @@ -10715,7 +10715,7 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = { {"sve2-bitperm", AARCH64_FEATURE (SVE2_BITPERM), AARCH64_FEATURE (SVE2)}, {"sme", AARCH64_FEATURE (SME), - AARCH64_FEATURES (2, SVE2, BFLOAT16)}, + AARCH64_FEATURES (3, BFLOAT16, F16, COMPNUM)}, {"sme-f64", AARCH64_FEATURE (SME_F64F64), AARCH64_FEATURE (SME)}, {"sme-f64f64", AARCH64_FEATURE (SME_F64F64), AARCH64_FEATURE (SME)}, {"sme-i64", AARCH64_FEATURE (SME_I16I64), AARCH64_FEATURE (SME)}, @@ -10837,6 +10837,13 @@ aarch64_feature_enable_set (aarch64_feature_set set) for (opt = aarch64_features; opt->name != NULL; opt++) if (AARCH64_CPU_HAS_ALL_FEATURES (set, opt->value)) AARCH64_MERGE_FEATURE_SETS (set, set, opt->require); + /* As a special case, we want +sme to imply +sve2, without letting + +nosve2 imply +nosme. This is to ensure maximum compatibility with + both toolchains that assume this dependency and those that don't. */ + aarch64_feature_set sme = AARCH64_FEATURE (SME); + aarch64_feature_set sve2 = AARCH64_FEATURE (SVE2); + if (AARCH64_CPU_HAS_ALL_FEATURES (set, sme)) + AARCH64_MERGE_FEATURE_SETS (set, set, sve2); } return set; } @@ -10941,6 +10948,20 @@ aarch64_parse_features (const char *str, const aarch64_feature_set **opt_p, str = ext; }; + /* The special handling in aarch64_feature_enable_set ought to be sufficient + to accommodate uncertainty over whether or not +sme in a target string + implies +sve2. Unfortunately, many streaming SVE instructions are + currently marked as requiring SVE or SVE2, and some parsing and error + reporting decisions also depend on SVE or SVE2 being specified. So for + now we will reenable the SVE and SVE2 bits if SME is enabled. This allows + us to support, for example, a compiler passing the command line + `-march=armv9-a+sme+nosve` and expecting all SME instructions to remain + enabled. */ + aarch64_feature_set sme = AARCH64_FEATURE (SME); + aarch64_feature_set sve_sve2 = AARCH64_FEATURES (2, SVE, SVE2); + if (AARCH64_CPU_HAS_ALL_FEATURES (*ext_set, sme)) + AARCH64_MERGE_FEATURE_SETS (*ext_set, *ext_set, sve_sve2); + *ext_set = aarch64_update_virtual_dependencies (*ext_set); return 1; } diff --git a/gas/doc/c-aarch64.texi b/gas/doc/c-aarch64.texi index 1dbefde26de..10888d1e78f 100644 --- a/gas/doc/c-aarch64.texi +++ b/gas/doc/c-aarch64.texi @@ -267,8 +267,8 @@ automatically cause those extensions to be disabled. @tab Enable Advanced SIMD extensions. @item @code{sm4} @tab @code{simd} @tab Enable the SM3 and SM4 cryptographic extensions. -@item @code{sme} @tab @code{sve2}, @code{bf16} - @tab Enable the Scalable Matrix Extension. +@item @code{sme} @tab @code{bf16}, @code{fp16}, @code{fcma} + @tab Enable the Scalable Matrix Extension. This will also enable @code{sve2}, but disabling @code{sve2} does not disable @code{sme}. @item @code{sme-b16b16} @tab @code{sme2}, @code{sve-b16b16} @tab Enable SME ZA-targeting non-widening BFloat16 instructions. @item @code{sme-f8f16} @tab @code{sme2}, @code{fp8} diff --git a/gas/testsuite/gas/aarch64/sme-nosve-1.d b/gas/testsuite/gas/aarch64/sme-nosve-1.d new file mode 100644 index 00000000000..39abbe69067 --- /dev/null +++ b/gas/testsuite/gas/aarch64/sme-nosve-1.d @@ -0,0 +1,15 @@ +#as: -march=armv8-a+sme +#as: -march=armv8-a+sme+nosve +#as: -march=armv8-a+sme+nosve2 +#as: -march=armv9-a+sme+nosve +#objdump: -dr + +.*: file format .* + + +Disassembly of section \.text: + +0+ <\.text>: + *[0-9a-f]+: 04d6a441 abs z1\.d, p1/m, z2\.d + *[0-9a-f]+: 4503d041 adclb z1\.s, z2\.s, z3\.s + *[0-9a-f]+: c0904460 addha za0\.s, p1/m, p2/m, z3\.s diff --git a/gas/testsuite/gas/aarch64/sme-nosve-1.s b/gas/testsuite/gas/aarch64/sme-nosve-1.s new file mode 100644 index 00000000000..e41d791f2e4 --- /dev/null +++ b/gas/testsuite/gas/aarch64/sme-nosve-1.s @@ -0,0 +1,3 @@ +abs z1.d, p1/m, z2.d +adclb z1.s, z2.s, z3.s +addha za0.s, p1/m, p2/m, z3.s -- 2.39.5