]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Restrict FCLAMP to SME2
authorRichard Sandiford <richard.sandiford@arm.com>
Fri, 8 Nov 2024 14:07:46 +0000 (14:07 +0000)
committerRichard Sandiford <richard.sandiford@arm.com>
Fri, 8 Nov 2024 14:07:46 +0000 (14:07 +0000)
There are two sets of patterns for FCLAMP: one set for single registers
and one set for multiple registers.  The multiple-register set was
correctly gated on SME2, but the single-register set only required SME.
This doesn't matter for ACLE usage, since the intrinsic definitions
are correctly gated.  But it does matter for automatic generation of
FCLAMP from separate minimum and maximum operations (either ACLE
intrinsics or autovectorised code).

gcc/
* config/aarch64/aarch64-sve2.md (@aarch64_sve_fclamp<mode>)
(*aarch64_sve_fclamp<mode>_x): Require TARGET_STREAMING_SME2
rather than TARGET_STREAMING_SME.

gcc/testsuite/
* gcc.target/aarch64/sme/clamp_3.c: Force sme2
* gcc.target/aarch64/sme/clamp_4.c: Likewise.
* gcc.target/aarch64/sme/clamp_5.c: New test.

(cherry picked from commit f5962839d6e0c3115931e68d938d9a0cd7a383b1)

gcc/config/aarch64/aarch64-sve2.md
gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c [new file with mode: 0644]

index 934e57055d3419e5dcc89b473fd110a0d4978b4f..bae153b2c8c3af229a6d05407276d6604d6b4e70 100644 (file)
             UNSPEC_FMAXNM)
           (match_operand:SVE_FULL_F 3 "register_operand")]
          UNSPEC_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
      [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
             UNSPEC_COND_FMAXNM)
           (match_operand:SVE_FULL_F 3 "register_operand")]
          UNSPEC_COND_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] #
      [     ?&w,  w, w, w; yes           ] #
index 44959f7949092c43ae7a25e7cdfd346629907be1..162de6224d58889c135b42f7bdbdfcb2710f38a9 100644 (file)
@@ -2,6 +2,8 @@
 
 #include <arm_sme.h>
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE)                                                     \
   TYPE                                                                 \
   tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming                 \
index 643b2635b90e83e2c8a35595cb1dce92bcedd006..453c82cd86057881b8cf487722d3e85c92246981 100644 (file)
@@ -2,6 +2,8 @@
 
 #include <arm_sme.h>
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE)                                                     \
   TYPE                                                                 \
   untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming                \
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
new file mode 100644 (file)
index 0000000..7c5464b
--- /dev/null
@@ -0,0 +1,24 @@
+// { dg-options "-O" }
+
+#include <arm_sme.h>
+
+#pragma GCC target "+nosme2"
+
+#define TEST(TYPE)                                                     \
+  TYPE                                                                 \
+  tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming                 \
+  {                                                                    \
+    return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c);  \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming                 \
+  {                                                                    \
+    return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c);  \
+  }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-not {\tfclamp\t} } } */