]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: AND/BIC combines for unpacked SVE FP comparisons
authorSpencer Abson <spencer.abson@arm.com>
Wed, 2 Jul 2025 21:31:40 +0000 (21:31 +0000)
committerSpencer Abson <spencer.abson@arm.com>
Tue, 15 Jul 2025 08:23:38 +0000 (08:23 +0000)
This patch extends the splitting patterns for combining FP comparisons
with predicated logical operations such that they cover all of SVE_F.

gcc/ChangeLog:

* config/aarch64/aarch64-sve.md (*fcm<cmp_op><mode>_and_combine):
Extend from SVE_FULL_F to SVE_F.
(*fcmuo<mode>_and_combine): Likewise.
(*fcm<cmp_op><mode>_bic_combine): Likewise.
(*fcm<cmp_op><mode>_nor_combine): Likewise.
(*fcmuo<mode>_bic_combine): Likewise.
(*fcmuo<mode>_nor_combine): Likewise.  Move the comment here to
above fcmuo<mode>_bic_combine, since it applies to both patterns.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/unpacked_fcm_combines_1.c: New test.
* gcc.target/aarch64/sve/unpacked_fcm_combines_2.c: Likewise.

gcc/config/aarch64/aarch64-sve.md
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c [new file with mode: 0644]

index 6b5113eb70fef0dbf4d8d3c8ea589224938da87f..10aecf1f19016fb036dfaca4762bc0507137bac2 100644 (file)
          (unspec:<VPRED>
            [(match_operand:<VPRED> 1)
             (const_int SVE_KNOWN_PTRUE)
-            (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
-            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+            (match_operand:SVE_F 2 "register_operand" "w, w")
+            (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
            SVE_COND_FP_CMP_I0)
          (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
   "TARGET_SVE"
          (unspec:<VPRED>
            [(match_operand:<VPRED> 1)
             (const_int SVE_KNOWN_PTRUE)
-            (match_operand:SVE_FULL_F 2 "register_operand" "w")
-            (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+            (match_operand:SVE_F 2 "register_operand" "w")
+            (match_operand:SVE_F 3 "register_operand" "w")]
            UNSPEC_COND_FCMUO)
          (match_operand:<VPRED> 4 "register_operand" "Upl")))]
   "TARGET_SVE"
              (unspec:<VPRED>
                [(match_operand:<VPRED> 1)
                 (const_int SVE_KNOWN_PTRUE)
-                (match_operand:SVE_FULL_F 2 "register_operand" "w")
-                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+                (match_operand:SVE_F 2 "register_operand" "w")
+                (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")]
                SVE_COND_FP_CMP_I0))
            (match_operand:<VPRED> 4 "register_operand" "Upa"))
          (match_dup:<VPRED> 1)))
              (unspec:<VPRED>
                [(match_operand:<VPRED> 1)
                 (const_int SVE_KNOWN_PTRUE)
-                (match_operand:SVE_FULL_F 2 "register_operand" "w")
-                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+                (match_operand:SVE_F 2 "register_operand" "w")
+                (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")]
                SVE_COND_FP_CMP_I0))
            (not:<VPRED>
              (match_operand:<VPRED> 4 "register_operand" "Upa")))
 }
 )
 
+;; Same for unordered comparisons.
 (define_insn_and_split "*fcmuo<mode>_bic_combine"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
        (and:<VPRED>
              (unspec:<VPRED>
                [(match_operand:<VPRED> 1)
                 (const_int SVE_KNOWN_PTRUE)
-                (match_operand:SVE_FULL_F 2 "register_operand" "w")
-                (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+                (match_operand:SVE_F 2 "register_operand" "w")
+                (match_operand:SVE_F 3 "register_operand" "w")]
                UNSPEC_COND_FCMUO))
            (match_operand:<VPRED> 4 "register_operand" "Upa"))
          (match_dup:<VPRED> 1)))
 }
 )
 
-;; Same for unordered comparisons.
 (define_insn_and_split "*fcmuo<mode>_nor_combine"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
        (and:<VPRED>
              (unspec:<VPRED>
                [(match_operand:<VPRED> 1)
                 (const_int SVE_KNOWN_PTRUE)
-                (match_operand:SVE_FULL_F 2 "register_operand" "w")
-                (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+                (match_operand:SVE_F 2 "register_operand" "w")
+                (match_operand:SVE_F 3 "register_operand" "w")]
                UNSPEC_COND_FCMUO))
            (not:<VPRED>
              (match_operand:<VPRED> 4 "register_operand" "Upa")))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c
new file mode 100644 (file)
index 0000000..d793a6c
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 --param=aarch64-autovec-preference=sve-only -fno-trapping-math" } */
+
+#include "unpacked_fcm_1.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 32 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 32 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 32 } } */
+
+/* Drop a PTRUE predicated AND with the loop mask and comparison result in
+   favour of predicating the comparison with the loop mask.  */
+/* { dg-final { scan-assembler-not {\tand\t} } } */
+
+/* Similarly, for codes that are implemented via an inversion, prefer
+   NOT (predicated with the loop mask) over BIC+PTRUE.  */
+/* { dg-final { scan-assembler-not {\tbic\t} } } */
+/* { dg-final { scan-assembler-times {\tnot\t} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c
new file mode 100644 (file)
index 0000000..b85391b
--- /dev/null
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 --param=aarch64-autovec-preference=sve-only -fno-trapping-math" } */
+
+#include <stdint.h>
+
+/* Ensure that we still emit NOR here, rather than two NOTs.  */
+
+#define TEST_FCM_NOR(TYPE0, TYPE1, CMP, COUNT)         \
+  void                                                 \
+  f_##TYPE0##_##TYPE1##_##CMP (TYPE0 *__restrict out,  \
+                              TYPE1 *__restrict a,     \
+                              TYPE1 *__restrict b,     \
+                              TYPE1 *__restrict c)     \
+  {                                                                \
+    for (unsigned int i = 0; i < COUNT; i++)                       \
+      out[i] = !(CMP (a[i], c[i]) | CMP (b[i], c[i])) ? 3 : out[i]; \
+  }
+
+#define GT(A, B) ((A) > (B))
+
+TEST_FCM_NOR (uint64_t, float, GT, 32)
+TEST_FCM_NOR (uint64_t, _Float16, GT, 32)
+TEST_FCM_NOR (uint32_t, _Float16, GT, 64)
+
+TEST_FCM_NOR (uint64_t, float, __builtin_isunordered, 32)
+TEST_FCM_NOR (uint64_t, _Float16, __builtin_isunordered, 32)
+TEST_FCM_NOR (uint32_t, _Float16, __builtin_isunordered, 64)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */
+
+/* { dg-final { scan-assembler-not {\tbic\t} } } */
+/* { dg-final { scan-assembler-not {\tnot\t} } } */
+/* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b\n} 6 } } */