aarch64: Tweak handling of general SVE permutes [PR121027]

author Richard Sandiford <richard.sandiford@arm.com>

Fri, 11 Jul 2025 15:48:41 +0000 (16:48 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Fri, 11 Jul 2025 15:48:41 +0000 (16:48 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Fri, 11 Jul 2025 15:48:41 +0000 (16:48 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Fri, 11 Jul 2025 15:48:41 +0000 (16:48 +0100)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 10b8ed5d3874a2d866a84d7139732b8629b1db36..6e16763f9571d97a783e9de7a57b87f923efe06c 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -26960,12 +26960,23 @@ aarch64_evpc_tbl (struct expand_vec_perm_d *d)
  static bool
  aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
  {
-  unsigned HOST_WIDE_INT nelt;
+  if (!d->one_vector_p)
+    {
+      /* aarch64_expand_sve_vec_perm does not yet handle variable-length
+        vectors.  */
+      if (!d->perm.length ().is_constant ())
+       return false;
  
-  /* Permuting two variable-length vectors could overflow the
-     index range.  */
-  if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
-    return false;
+      /* This permutation reduces to the vec_perm optab if the elements are
+        large enough to hold all selector indices.  Do not handle that case
+        here, since the general TBL+SUB+TBL+ORR sequence is too expensive to
+        be considered a "native" constant permutation.
+
+        Not doing this would undermine code that queries can_vec_perm_const_p
+        with allow_variable_p set to false.  See PR121027.  */
+      if (selector_fits_mode_p (d->vmode, d->perm))
+       return false;
+    }
  
    if (d->testing_p)
      return true;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/perm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/perm_1.c

new file mode 100644 (file)

index 0000000..6b920b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/perm_1.c
@@ -0,0 +1,14 @@
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+
+#include <arm_sve.h>
+typedef svbfloat16_t vls_bfloat16_t __attribute__((arm_sve_vector_bits(32 * 8)));
+svbfloat16_t foo(vls_bfloat16_t a, vls_bfloat16_t b)
+{
+  svbfloat16_t zero = svreinterpret_bf16_f32 (svdup_n_f32 (0.0f));
+  return svzip2_bf16(zero, svuzp1_bf16(a,b));
+}
+
+
+/* { dg-final { scan-assembler-times {\tuzp1\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tzip2\t} 1 } } */
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
author	Richard Sandiford <richard.sandiford@arm.com>
	Fri, 11 Jul 2025 15:48:41 +0000 (16:48 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Fri, 11 Jul 2025 15:48:41 +0000 (16:48 +0100)
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/acle/general/perm_1.c	[new file with mode: 0644]	patch \| blob