&& IN_RANGE (INTVAL (base_or_step), -16, 15));
}
+/* Return true if SERIES is a constant vector that can be loaded using
+ an immediate SVE INDEX, considering both SVE and Advanced SIMD modes.
+ When returning true, store the base in *BASE_OUT and the step
+ in *STEP_OUT. */
+
+static bool
+aarch64_sve_index_series_p (rtx series, rtx *base_out, rtx *step_out)
+{
+ rtx base, step;
+ if (!const_vec_series_p (series, &base, &step)
+ || !CONST_INT_P (base)
+ || !CONST_INT_P (step))
+ return false;
+
+ auto mode = GET_MODE (series);
+ auto elt_mode = as_a<scalar_int_mode> (GET_MODE_INNER (mode));
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+ if (BYTES_BIG_ENDIAN && (vec_flags & VEC_ADVSIMD))
+ {
+ /* On big-endian targets, architectural lane 0 holds the last element
+ for Advanced SIMD and the first element for SVE; see the comment at
+ the head of aarch64-sve.md for details. This means that, from an SVE
+ point of view, an Advanced SIMD series goes from the last element to
+ the first. */
+ auto i = GET_MODE_NUNITS (mode).to_constant () - 1;
+ base = gen_int_mode (UINTVAL (base) + i * UINTVAL (step), elt_mode);
+ step = gen_int_mode (-UINTVAL (step), elt_mode);
+ }
+
+ if (!aarch64_sve_index_immediate_p (base)
+ || !aarch64_sve_index_immediate_p (step))
+ return false;
+
+ /* If the mode spans multiple registers, check that each subseries is
+ in range. */
+ unsigned int nvectors = aarch64_ldn_stn_vectors (mode);
+ if (nvectors != 1)
+ {
+ unsigned int nunits;
+ if (!GET_MODE_NUNITS (mode).is_constant (&nunits))
+ return false;
+ nunits /= nvectors;
+ for (unsigned int i = 1; i < nvectors; ++i)
+ if (!IN_RANGE (INTVAL (base) + i * nunits * INTVAL (step), -16, 15))
+ return false;
+ }
+
+ *base_out = base;
+ *step_out = step;
+ return true;
+}
+
/* Return true if X is a valid immediate for the SVE ADD and SUB instructions
when applied to mode MODE. Negate X first if NEGATE_P is true. */
n_elts = CONST_VECTOR_NPATTERNS (op);
else if (which == AARCH64_CHECK_MOV
&& TARGET_SVE
- && const_vec_series_p (op, &base, &step))
+ && aarch64_sve_index_series_p (op, &base, &step))
{
- gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
- if (!aarch64_sve_index_immediate_p (base)
- || !aarch64_sve_index_immediate_p (step))
- return false;
-
if (info)
{
/* Get the corresponding container mode. E.g. an INDEX on V2SI
--- /dev/null
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve"
+
+svint64x2_t __RTL (startwith ("vregs")) foo ()
+{
+ (function "foo"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (insn 3 (set (reg:VNx4DI <0>)
+ (const_vector:VNx4DI [(const_int 11)
+ (const_int 12)
+ (const_int 13)
+ (const_int 14)
+ (const_int 15)
+ (const_int 16)
+ (const_int 17)
+ (const_int 18)])))
+ (insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
+ (insn 5 (use (reg:VNx4DI v0)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl (return_rtx (reg:VNx4DI v0)))
+ ) ;; function
+}
+
+/* { dg-final { scan-assembler {\tindex\tz0\.d, #11, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz1\.d, #15, #1\n} } } */
--- /dev/null
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve"
+
+svint64x2_t __RTL (startwith ("vregs")) foo ()
+{
+ (function "foo"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (insn 3 (set (reg:VNx4DI <0>)
+ (const_vector:VNx4DI [(const_int -16)
+ (const_int -15)
+ (const_int -14)
+ (const_int -13)
+ (const_int -12)
+ (const_int -11)
+ (const_int -10)
+ (const_int -9)])))
+ (insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
+ (insn 5 (use (reg:VNx4DI v0)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl (return_rtx (reg:VNx4DI v0)))
+ ) ;; function
+}
+
+/* { dg-final { scan-assembler {\tindex\tz0\.d, #-16, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz1\.d, #-12, #1\n} } } */
return svdupq_s32 (x, 1, 2, 3);
}
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
return svdupq_s32 (0, 1, x, 3);
}
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
/* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mlittle-endian" } */
/* { dg-final { check-function-bodies "**" "" "" } } */
typedef char v16qi __attribute__ ((vector_size (16)));
typedef short v4hi __attribute__ ((vector_size (8)));
typedef int v4si __attribute__ ((vector_size (16)));
typedef int v2si __attribute__ ((vector_size (8)));
-typedef long v2di __attribute__ ((vector_size (16)));
+typedef long long v2di __attribute__ ((vector_size (16)));
/*
** f_v16qi:
{
return (v4si){ 3, -1, -5, -9 };
}
+
+/*
+** g_min_1:
+** index z0\.s, #-16, #1
+** ret
+*/
+v4si
+g_min_1 (void)
+{
+ return (v4si){ -16, -15, -14, -13 };
+}
+
+/*
+** g_min_min:
+** index z0\.s, #-16, #-16
+** ret
+*/
+v4si
+g_min_min (void)
+{
+ return (v4si){ -16, -32, -48, -64 };
+}
+
+/*
+** g_min_max:
+** index z0\.s, #-16, #15
+** ret
+*/
+v4si
+g_min_max (void)
+{
+ return (v4si){ -16, -1, 14, 29 };
+}
+
+/*
+** g_max_1:
+** index z0\.s, #15, #1
+** ret
+*/
+v4si
+g_max_1 (void)
+{
+ return (v4si){ 15, 16, 17, 18 };
+}
+
+/*
+** g_max_min:
+** index z0\.s, #15, #-16
+** ret
+*/
+v4si
+g_max_min (void)
+{
+ return (v4si){ 15, -1, -17, -33 };
+}
+
+/*
+** g_max_max:
+** index z0\.s, #15, #15
+** ret
+*/
+v4si
+g_max_max (void)
+{
+ return (v4si){ 15, 30, 45, 60 };
+}
+
+/*
+** g_ob_1:
+** ((?!index).)*
+** ret
+*/
+v4si
+g_ob_1 (void)
+{
+ return (v4si){ -17, -16, -15, -14 };
+}
+
+/*
+** g_ob_2:
+** ((?!index).)*
+** ret
+*/
+v4si
+g_ob_2 (void)
+{
+ return (v4si){ 16, 17, 18, 19 };
+}
+
+/*
+** g_ob_3:
+** ((?!index).)*
+** ret
+*/
+v4si
+g_ob_3 (void)
+{
+ return (v4si){ 0, -17, -34, -51 };
+}
+
+/*
+** g_ob_4:
+** ((?!index).)*
+** ret
+*/
+v4si
+g_ob_4 (void)
+{
+ return (v4si){ 0, 16, 32, 48 };
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+
+/*
+** f_v16qi:
+** index z0\.b, #15, #-1
+** ret
+*/
+v16qi
+f_v16qi (void)
+{
+ return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+}
+
+/*
+** f_v8qi:
+** index z0\.b, #7, #-1
+** ret
+*/
+v8qi
+f_v8qi (void)
+{
+ return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v8hi:
+** index z0\.h, #7, #-1
+** ret
+*/
+v8hi
+f_v8hi (void)
+{
+ return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v4hi:
+** index z0\.h, #3, #-1
+** ret
+*/
+v4hi
+f_v4hi (void)
+{
+ return (v4hi){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v4si:
+** index z0\.s, #3, #-1
+** ret
+*/
+v4si
+f_v4si (void)
+{
+ return (v4si){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v2si:
+** index z0\.s, #1, #-1
+** ret
+*/
+v2si
+f_v2si (void)
+{
+ return (v2si){ 0, 1 };
+}
+
+/*
+** f_v2di:
+** index z0\.d, #1, #-1
+** ret
+*/
+v2di
+f_v2di (void)
+{
+ return (v2di){ 0, 1 };
+}
+
+/*
+** g_v4si:
+** index z0\.s, #-9, #4
+** ret
+*/
+v4si
+g_v4si (void)
+{
+ return (v4si){ 3, -1, -5, -9 };
+}
+
+/*
+** g_min_1:
+** index z0\.s, #-16, #1
+** ret
+*/
+v4si
+g_min_1 (void)
+{
+ return (v4si){ -13, -14, -15, -16 };
+}
+
+/*
+** g_min_min:
+** index z0\.s, #-16, #-16
+** ret
+*/
+v4si
+g_min_min (void)
+{
+ return (v4si){ -64, -48, -32, -16 };
+}
+
+/*
+** g_min_max:
+** index z0\.s, #-16, #15
+** ret
+*/
+v4si
+g_min_max (void)
+{
+ return (v4si){ 29, 14, -1, -16 };
+}
+
+/*
+** g_max_1:
+** index z0\.s, #15, #1
+** ret
+*/
+v4si
+g_max_1 (void)
+{
+ return (v4si){ 18, 17, 16, 15 };
+}
+
+/*
+** g_max_min:
+** index z0\.s, #15, #-16
+** ret
+*/
+v4si
+g_max_min (void)
+{
+ return (v4si){ -33, -17, -1, 15 };
+}
+
+/*
+** g_max_max:
+** index z0\.s, #15, #15
+** ret
+*/
+v4si
+g_max_max (void)
+{
+ return (v4si){ 60, 45, 30, 15 };
+}
+
+/*
+** g_ob_1:
+** ((?!index).)*
+** ret
+*/
+v4si
+g_ob_1 (void)
+{
+ return (v4si){ -14, -15, -16, -17 };
+}
+
+/*
+** g_ob_2:
+** ((?!index).)*
+** ret
+*/
+v4si
+g_ob_2 (void)
+{
+ return (v4si){ 19, 18, 17, 16 };
+}
+
+/*
+** g_ob_3:
+** ((?!index).)*
+** ret
+*/
+v4si
+g_ob_3 (void)
+{
+ return (v4si){ -51, -34, -17, 0 };
+}
+
+/*
+** g_ob_4:
+** ((?!index).)*
+** ret
+*/
+v4si
+g_ob_4 (void)
+{
+ return (v4si){ 48, 32, 16, 0 };
+}