]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Fix bogus cnot optimisation [PR114603]
authorRichard Sandiford <richard.sandiford@arm.com>
Fri, 16 Aug 2024 14:37:50 +0000 (15:37 +0100)
committerRichard Sandiford <richard.sandiford@arm.com>
Fri, 16 Aug 2024 14:37:50 +0000 (15:37 +0100)
aarch64-sve.md had a pattern that combined:

cmpeq pb.T, pa/z, zc.T, #0
mov zd.T, pb/z, #1

into:

cnot zd.T, pa/m, zc.T

But this is only valid if pa.T is a ptrue.  In other cases, the
original would set inactive elements of zd.T to 0, whereas the
combined form would copy elements from zc.T.

gcc/
PR target/114603
* config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace
with...
(@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be
a ptrue.
(*cnot<mode>): Require operand 1 to be a ptrue.
* config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand):
Use aarch64_ptrue_cnot<mode> for _x operations that are predicated
with a ptrue.  Represent other _x operations as fully-defined _m
operations.

gcc/testsuite/
PR target/114603
* gcc.target/aarch64/sve/acle/general/cnot_1.c: New test.

(cherry picked from commit 67cbb1c638d6ab3a9cb77e674541e2b291fb67df)

gcc/config/aarch64/aarch64-sve-builtins-base.cc
gcc/config/aarch64/aarch64-sve.md
gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c [new file with mode: 0644]

index e5e0d6ed5c908051af27ae54b670ea92de7d8516..f96cb3ccc7bfce885c2643a84c50667e4e1b9673 100644 (file)
@@ -494,15 +494,22 @@ public:
   expand (function_expander &e) const OVERRIDE
   {
     machine_mode mode = e.vector_mode (0);
-    if (e.pred == PRED_x)
-      {
-       /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
-          a ptrue hint.  */
-       e.add_ptrue_hint (0, e.gp_mode (0));
-       return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
-      }
-
-    return e.use_cond_insn (code_for_cond_cnot (mode), 0);
+    machine_mode pred_mode = e.gp_mode (0);
+    /* The underlying _x pattern is effectively:
+
+        dst = src == 0 ? 1 : 0
+
+       rather than an UNSPEC_PRED_X.  Using this form allows autovec
+       constructs to be matched by combine, but it means that the
+       predicate on the src == 0 comparison must be all-true.
+
+       For simplicity, represent other _x operations as fully-defined _m
+       operations rather than using a separate bespoke pattern.  */
+    if (e.pred == PRED_x
+       && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode))
+      return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode));
+    return e.use_cond_insn (code_for_cond_cnot (mode),
+                           e.pred == PRED_x ? 1 : 0);
   }
 };
 
index b8cc47ef5fcee84f9b4c6637f99a79ca632fab61..c68a3598423f4010b67b4797a810a25b63c4c051 100644 (file)
 ;; - CNOT
 ;; -------------------------------------------------------------------------
 
-;; Predicated logical inverse.
-(define_expand "@aarch64_pred_cnot<mode>"
+;; Logical inverse, predicated with a ptrue.
+(define_expand "@aarch64_ptrue_cnot<mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
        (unspec:SVE_FULL_I
          [(unspec:<VPRED>
             [(match_operand:<VPRED> 1 "register_operand")
-             (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+             (const_int SVE_KNOWN_PTRUE)
              (eq:<VPRED>
-               (match_operand:SVE_FULL_I 3 "register_operand")
-               (match_dup 4))]
+               (match_operand:SVE_FULL_I 2 "register_operand")
+               (match_dup 3))]
             UNSPEC_PRED_Z)
-          (match_dup 5)
-          (match_dup 4)]
+          (match_dup 4)
+          (match_dup 3)]
          UNSPEC_SEL))]
   "TARGET_SVE"
   {
-    operands[4] = CONST0_RTX (<MODE>mode);
-    operands[5] = CONST1_RTX (<MODE>mode);
+    operands[3] = CONST0_RTX (<MODE>mode);
+    operands[4] = CONST1_RTX (<MODE>mode);
   }
 )
 
        (unspec:SVE_I
          [(unspec:<VPRED>
             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-             (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+             (const_int SVE_KNOWN_PTRUE)
              (eq:<VPRED>
                (match_operand:SVE_I 2 "register_operand" "0, w")
                (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
new file mode 100644 (file)
index 0000000..b1a489f
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** foo:
+**     cmpeq   (p[0-7])\.s, p0/z, z0\.s, #0
+**     mov     z0\.s, \1/z, #1
+**     ret
+*/
+svint32_t foo(svbool_t pg, svint32_t y)
+{
+  return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0));
+}
+
+#ifdef __cplusplus
+}
+#endif