aarch64: Some fixes for SVE INDEX constants

author Richard Sandiford <richard.sandiford@arm.com>

Wed, 9 Jul 2025 15:39:20 +0000 (16:39 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Wed, 9 Jul 2025 15:39:20 +0000 (16:39 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Wed, 9 Jul 2025 15:39:20 +0000 (16:39 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Wed, 9 Jul 2025 15:39:20 +0000 (16:39 +0100)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 7960b639f903c77b45fb43757baa129996f043de..bc28f1c584d2532c01fbefa73dbfe2013594444d 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23074,6 +23074,58 @@ aarch64_sve_index_immediate_p (rtx base_or_step)
           && IN_RANGE (INTVAL (base_or_step), -16, 15));
  }
  
+/* Return true if SERIES is a constant vector that can be loaded using
+   an immediate SVE INDEX, considering both SVE and Advanced SIMD modes.
+   When returning true, store the base in *BASE_OUT and the step
+   in *STEP_OUT.  */
+
+static bool
+aarch64_sve_index_series_p (rtx series, rtx *base_out, rtx *step_out)
+{
+  rtx base, step;
+  if (!const_vec_series_p (series, &base, &step)
+      || !CONST_INT_P (base)
+      || !CONST_INT_P (step))
+    return false;
+
+  auto mode = GET_MODE (series);
+  auto elt_mode = as_a<scalar_int_mode> (GET_MODE_INNER (mode));
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  if (BYTES_BIG_ENDIAN && (vec_flags & VEC_ADVSIMD))
+    {
+      /* On big-endian targets, architectural lane 0 holds the last element
+        for Advanced SIMD and the first element for SVE; see the comment at
+        the head of aarch64-sve.md for details.  This means that, from an SVE
+        point of view, an Advanced SIMD series goes from the last element to
+        the first.  */
+      auto i = GET_MODE_NUNITS (mode).to_constant () - 1;
+      base = gen_int_mode (UINTVAL (base) + i * UINTVAL (step), elt_mode);
+      step = gen_int_mode (-UINTVAL (step), elt_mode);
+    }
+
+  if (!aarch64_sve_index_immediate_p (base)
+      || !aarch64_sve_index_immediate_p (step))
+    return false;
+
+  /* If the mode spans multiple registers, check that each subseries is
+     in range.  */
+  unsigned int nvectors = aarch64_ldn_stn_vectors (mode);
+  if (nvectors != 1)
+    {
+      unsigned int nunits;
+      if (!GET_MODE_NUNITS (mode).is_constant (&nunits))
+       return false;
+      nunits /= nvectors;
+      for (unsigned int i = 1; i < nvectors; ++i)
+       if (!IN_RANGE (INTVAL (base) + i * nunits * INTVAL (step), -16, 15))
+         return false;
+    }
+
+  *base_out = base;
+  *step_out = step;
+  return true;
+}
+
  /* Return true if X is a valid immediate for the SVE ADD and SUB instructions
     when applied to mode MODE.  Negate X first if NEGATE_P is true.  */
  
@@ -23522,13 +23574,8 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
      n_elts = CONST_VECTOR_NPATTERNS (op);
    else if (which == AARCH64_CHECK_MOV
            && TARGET_SVE
-          && const_vec_series_p (op, &base, &step))
+          && aarch64_sve_index_series_p (op, &base, &step))
      {
-      gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
-      if (!aarch64_sve_index_immediate_p (base)
-         || !aarch64_sve_index_immediate_p (step))
-       return false;
-
        if (info)
         {
           /* Get the corresponding container mode.  E.g. an INDEX on V2SI
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c

new file mode 100644 (file)

index 0000000..6f795c6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve"
+
+svint64x2_t __RTL (startwith ("vregs")) foo ()
+{
+  (function "foo"
+    (insn-chain
+      (block 2
+       (edge-from entry (flags "FALLTHRU"))
+       (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+        (cnote 2 NOTE_INSN_FUNCTION_BEG)
+       (insn 3 (set (reg:VNx4DI <0>)
+                    (const_vector:VNx4DI [(const_int 11)
+                                          (const_int 12)
+                                          (const_int 13)
+                                          (const_int 14)
+                                          (const_int 15)
+                                          (const_int 16)
+                                          (const_int 17)
+                                          (const_int 18)])))
+       (insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
+        (insn 5 (use (reg:VNx4DI v0)))
+       (edge-to exit (flags "FALLTHRU"))
+      ) ;; block 2
+    ) ;; insn-chain
+    (crtl (return_rtx (reg:VNx4DI v0)))
+  ) ;; function
+}
+
+/* { dg-final { scan-assembler {\tindex\tz0\.d, #11, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz1\.d, #15, #1\n} } } */
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c

new file mode 100644 (file)

index 0000000..17e46cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve"
+
+svint64x2_t __RTL (startwith ("vregs")) foo ()
+{
+  (function "foo"
+    (insn-chain
+      (block 2
+       (edge-from entry (flags "FALLTHRU"))
+       (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+        (cnote 2 NOTE_INSN_FUNCTION_BEG)
+       (insn 3 (set (reg:VNx4DI <0>)
+                    (const_vector:VNx4DI [(const_int -16)
+                                          (const_int -15)
+                                          (const_int -14)
+                                          (const_int -13)
+                                          (const_int -12)
+                                          (const_int -11)
+                                          (const_int -10)
+                                          (const_int -9)])))
+       (insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
+        (insn 5 (use (reg:VNx4DI v0)))
+       (edge-to exit (flags "FALLTHRU"))
+      ) ;; block 2
+    ) ;; insn-chain
+    (crtl (return_rtx (reg:VNx4DI v0)))
+  ) ;; function
+}
+
+/* { dg-final { scan-assembler {\tindex\tz0\.d, #-16, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz1\.d, #-12, #1\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c

index 218a6601337544c3c1ead1fa115b49761c545f34..13ebb9fd6fee7bd07c58c3814b31c3b96f252df9 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
@@ -10,6 +10,6 @@ dupq (int x)
    return svdupq_s32 (x, 1, 2, 3);
  }
  
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
  /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
  /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c

index cbee6f27b62f4dad139d4a7c789bad9e8eb455b8..13d27e2781d1b4b3155afdf5ec1374bf5b06701d 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
@@ -10,6 +10,6 @@ dupq (int x)
    return svdupq_s32 (0, 1, x, 3);
  }
  
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
  /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
  /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c

index 25910dbfa1fb32adb6ccc7c53e2c6a658dec0de7..5100a87c0d930ecc14c6baffd6f57fab059bbccf 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mlittle-endian" } */
  /* { dg-final { check-function-bodies "**" "" "" } } */
  
  typedef char v16qi __attribute__ ((vector_size (16)));
@@ -8,7 +8,7 @@ typedef short v8hi __attribute__ ((vector_size (16)));
  typedef short v4hi __attribute__ ((vector_size (8)));
  typedef int v4si __attribute__ ((vector_size (16)));
  typedef int v2si __attribute__ ((vector_size (8)));
-typedef long v2di __attribute__ ((vector_size (16)));
+typedef long long v2di __attribute__ ((vector_size (16)));
  
  /*
  ** f_v16qi:
@@ -97,3 +97,113 @@ g_v4si (void)
  {
    return (v4si){ 3, -1, -5, -9 };
  }
+
+/*
+** g_min_1:
+**     index   z0\.s, #-16, #1
+**     ret
+*/
+v4si
+g_min_1 (void)
+{
+  return (v4si){ -16, -15, -14, -13 };
+}
+
+/*
+** g_min_min:
+**     index   z0\.s, #-16, #-16
+**     ret
+*/
+v4si
+g_min_min (void)
+{
+  return (v4si){ -16, -32, -48, -64 };
+}
+
+/*
+** g_min_max:
+**     index   z0\.s, #-16, #15
+**     ret
+*/
+v4si
+g_min_max (void)
+{
+  return (v4si){ -16, -1, 14, 29 };
+}
+
+/*
+** g_max_1:
+**     index   z0\.s, #15, #1
+**     ret
+*/
+v4si
+g_max_1 (void)
+{
+  return (v4si){ 15, 16, 17, 18 };
+}
+
+/*
+** g_max_min:
+**     index   z0\.s, #15, #-16
+**     ret
+*/
+v4si
+g_max_min (void)
+{
+  return (v4si){ 15, -1, -17, -33 };
+}
+
+/*
+** g_max_max:
+**     index   z0\.s, #15, #15
+**     ret
+*/
+v4si
+g_max_max (void)
+{
+  return (v4si){ 15, 30, 45, 60 };
+}
+
+/*
+** g_ob_1:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_1 (void)
+{
+  return (v4si){ -17, -16, -15, -14 };
+}
+
+/*
+** g_ob_2:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_2 (void)
+{
+  return (v4si){ 16, 17, 18, 19 };
+}
+
+/*
+** g_ob_3:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_3 (void)
+{
+  return (v4si){ 0, -17, -34, -51 };
+}
+
+/*
+** g_ob_4:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_4 (void)
+{
+  return (v4si){ 0, 16, 32, 48 };
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c

new file mode 100644 (file)

index 0000000..0681d95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
@@ -0,0 +1,209 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+
+/*
+** f_v16qi:
+**     index   z0\.b, #15, #-1
+**     ret
+*/
+v16qi
+f_v16qi (void)
+{
+  return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+}
+
+/*
+** f_v8qi:
+**     index   z0\.b, #7, #-1
+**     ret
+*/
+v8qi
+f_v8qi (void)
+{
+  return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v8hi:
+**     index   z0\.h, #7, #-1
+**     ret
+*/
+v8hi
+f_v8hi (void)
+{
+  return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v4hi:
+**     index   z0\.h, #3, #-1
+**     ret
+*/
+v4hi
+f_v4hi (void)
+{
+  return (v4hi){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v4si:
+**     index   z0\.s, #3, #-1
+**     ret
+*/
+v4si
+f_v4si (void)
+{
+  return (v4si){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v2si:
+**     index   z0\.s, #1, #-1
+**     ret
+*/
+v2si
+f_v2si (void)
+{
+  return (v2si){ 0, 1 };
+}
+
+/*
+** f_v2di:
+**     index   z0\.d, #1, #-1
+**     ret
+*/
+v2di
+f_v2di (void)
+{
+  return (v2di){ 0, 1 };
+}
+
+/*
+** g_v4si:
+**     index   z0\.s, #-9, #4
+**     ret
+*/
+v4si
+g_v4si (void)
+{
+  return (v4si){ 3, -1, -5, -9 };
+}
+
+/*
+** g_min_1:
+**     index   z0\.s, #-16, #1
+**     ret
+*/
+v4si
+g_min_1 (void)
+{
+  return (v4si){ -13, -14, -15, -16 };
+}
+
+/*
+** g_min_min:
+**     index   z0\.s, #-16, #-16
+**     ret
+*/
+v4si
+g_min_min (void)
+{
+  return (v4si){ -64, -48, -32, -16 };
+}
+
+/*
+** g_min_max:
+**     index   z0\.s, #-16, #15
+**     ret
+*/
+v4si
+g_min_max (void)
+{
+  return (v4si){ 29, 14, -1, -16 };
+}
+
+/*
+** g_max_1:
+**     index   z0\.s, #15, #1
+**     ret
+*/
+v4si
+g_max_1 (void)
+{
+  return (v4si){ 18, 17, 16, 15 };
+}
+
+/*
+** g_max_min:
+**     index   z0\.s, #15, #-16
+**     ret
+*/
+v4si
+g_max_min (void)
+{
+  return (v4si){ -33, -17, -1, 15 };
+}
+
+/*
+** g_max_max:
+**     index   z0\.s, #15, #15
+**     ret
+*/
+v4si
+g_max_max (void)
+{
+  return (v4si){ 60, 45, 30, 15 };
+}
+
+/*
+** g_ob_1:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_1 (void)
+{
+  return (v4si){ -14, -15, -16, -17 };
+}
+
+/*
+** g_ob_2:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_2 (void)
+{
+  return (v4si){ 19, 18, 17, 16 };
+}
+
+/*
+** g_ob_3:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_3 (void)
+{
+  return (v4si){ -51, -34, -17, 0 };
+}
+
+/*
+** g_ob_4:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_4 (void)
+{
+  return (v4si){ 48, 32, 16, 0 };
+}
author	Richard Sandiford <richard.sandiford@arm.com>
	Wed, 9 Jul 2025 15:39:20 +0000 (16:39 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Wed, 9 Jul 2025 15:39:20 +0000 (16:39 +0100)
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c	[new file with mode: 0644]	patch \| blob