]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Use SVE2 BSL2N for vector EON
authorKyrylo Tkachov <ktkachov@nvidia.com>
Fri, 11 Jul 2025 14:23:16 +0000 (07:23 -0700)
committerKyrylo Tkachov <ktkachov@nvidia.com>
Wed, 16 Jul 2025 07:27:17 +0000 (09:27 +0200)
SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
(x & z) | (~x & ~z) which is ~(x ^ z).
Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
Advanced SIMD and SVE modes when TARGET_SVE2.
This patch does that.
For code like:

uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }

We now generate:
eon_q:
        bsl2n   z0.d, z0.d, z0.d, z1.d
        ret

eon_z:
        bsl2n   z0.d, z0.d, z0.d, z1.d
        ret

instead of the previous:
eon_q:
        eor     v0.16b, v0.16b, v1.16b
        not     v0.16b, v0.16b
        ret

eon_z:
        eor     z0.d, z0.d, z1.d
        ptrue   p3.b, all
        not     z0.d, p3/m, z0.d
        ret

Bootstrapped and tested on aarch64-none-linux-gnu.

Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com>
gcc/

* config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
New pattern.
(*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.

gcc/testsuite/

* gcc.target/aarch64/sve2/eon_bsl2n.c: New test.

gcc/config/aarch64/aarch64-sve2.md
gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c [new file with mode: 0644]

index 7148f54b363f1fca5edb46ccf1122236f8fc54a1..8c03e28cb0840a9390fd5afeb45bddf1846b625e 100644 (file)
   }
 )
 
+;; Vector EON (~(x, y)) using BSL2N.
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+       (unspec:SVE_FULL_I
+         [(match_operand 3)
+          (not:SVE_FULL_I
+            (xor:SVE_FULL_I
+               (match_operand:SVE_FULL_I 1 "register_operand")
+               (match_operand:SVE_FULL_I 2 "register_operand")))]
+           UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
+     [ w  ,      0, w ; *              ] bsl2n\t%0.d, %0.d, %0.d, %2.d
+     [ ?&w,      w, w ; yes            ] movprfx\t%0, %1\;bsl2n\t%0.d, %0.d, %1.d, %2.d
+  }
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+       (not:VDQ_I
+         (xor:VDQ_I
+           (match_operand:VDQ_I 1 "register_operand")
+           (match_operand:VDQ_I 2 "register_operand"))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
+     [ w  ,      0, w ; *              ] bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d
+     [ ?&w,      w, w ; yes            ] movprfx\t%Z0, %Z1\;bsl2n\t%Z0.d, %Z0.d, %Z1.d, %Z2.d
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Shift-and-accumulate operations
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
new file mode 100644 (file)
index 0000000..74b4637
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+
+#define EON(x, y)   (~((x) ^ (y)))
+
+/*
+** eon_d:
+**     bsl2n   z0.d, z0.d, z0.d, z1.d
+**     ret
+*/
+uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); }
+
+/*
+** eon_d_mp:
+**     movprfx z0, z1
+**     bsl2n   z0.d, z0.d, z1.d, z2.d
+**     ret
+*/
+uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return EON(a, b); }
+
+/*
+** eon_q:
+**     bsl2n   z0.d, z0.d, z0.d, z1.d
+**     ret
+*/
+uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
+
+/*
+** eon_q_mp:
+**     movprfx z0, z1
+**     bsl2n   z0.d, z0.d, z1.d, z2.d
+**     ret
+*/
+uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return EON(a, b); }
+
+/*
+** eon_z:
+**     bsl2n   z0.d, z0.d, z0.d, z1.d
+**     ret
+*/
+svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
+
+/*
+** eon_z_mp:
+**     movprfx z0, z1
+**     bsl2n   z0.d, z0.d, z1.d, z2.d
+**     ret
+*/
+svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, b); }