Broadly speaking, these tests were failing because the BB limitation for SLP'ing
loads in an || in an early break makes the loads end up in different BBs and so
today we can't SLP them. This results in load_lanes being required to vectorize
them because the alternative is loads with permutes which we don't allow.
The original checks were only checking partial vectors, which ended up working
because e.g. Adv. SIMD isn't a partial vector target, so it failed, and SVE was
a partial vector target but also has load lanes so it passes.
GCN however is a partial vector target without load lanes which makes the tests
fail. As we require load_lanes for now, also check for them.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Cross checked the failing cases on amdgcn-amdhsa
and all pass now.
gcc/testsuite/ChangeLog:
PR target/119286
* gcc.dg/vect/bb-slp-41.c: Add pragma novector.
* gcc.dg/vect/vect-early-break_133_pfa11.c: Should never vectorize today
as indexes can be out of range.
* gcc.dg/vect/vect-early-break_128.c: Require load_lanes as well.
* gcc.dg/vect/vect-early-break_133_pfa10.c: Likewise.
* gcc.dg/vect/vect-early-break_133_pfa8.c: Likewise.
* gcc.dg/vect/vect-early-break_133_pfa9.c: Likewise.
* gcc.dg/vect/vect-early-break_22.c: Likewise.
* gcc.dg/vect/vect-early-break_26.c: Likewise.
* gcc.dg/vect/vect-early-break_43.c: Likewise.
* gcc.dg/vect/vect-early-break_44.c: Likewise.
* gcc.dg/vect/vect-early-break_6.c: Likewise.
* gcc.dg/vect/vect-early-break_56.c: Expect failures on group misalign.
foo (a1, b);
bar (a2, b);
+#pragma GCC novector
for (i = 0; i < ARR_SIZE; i++)
if (a1[i] != a2[i])
return 1;
/* { dg-require-effective-target vect_early_break } */
/* { dg-require-effective-target vect_int } */
-/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target vect_partial_vectors } } } */
-/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "vect" { target { ! vect_partial_vectors } } } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
#ifndef N
/* { dg-additional-options "-Ofast" } */
/* Alignment requirement too big, load lanes targets can't safely vectorize this. */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { vect_partial_vectors || vect_load_lanes } } } } */
-/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! { vect_partial_vectors || vect_load_lanes } } } } } */
-/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" { target { ! { vect_partial_vectors || vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" { target { ! vect_load_lanes } } } } */
unsigned test4(char x, char *restrict vect_a, char *restrict vect_b, int n)
{
/* { dg-require-effective-target vect_early_break } */
/* { dg-require-effective-target vect_int } */
-/* Gathers and scatters are not save to speculate across early breaks. */
-/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_partial_vectors } } } } */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_partial_vectors } } } */
+/* Gathers and scatters are not safe to speculate across early breaks. */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
#define N 1024
int vect_a[N];
/* { dg-additional-options "-Ofast" } */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_partial_vectors } } } */
-/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_partial_vectors } } } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
char vect_a[1025];
char vect_b[1025];
/* { dg-additional-options "-Ofast" } */
/* Group size is uneven and second group is misaligned. Needs partial vectors. */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_partial_vectors } } } */
-/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_partial_vectors } } } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */
}
/* This will fail because we cannot SLP the load groups yet. */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" { target vect_partial_vectors } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" { target { ! vect_partial_vectors } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
}
/* This will fail because we cannot SLP the load groups yet. */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" { target vect_partial_vectors } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" { target { ! vect_partial_vectors } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
}
/* This will fail because we cannot SLP the load groups yet. */
-/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target vect_partial_vectors } } } */
-/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" { target { ! vect_partial_vectors } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
}
/* This will fail because we cannot SLP the load groups yet. */
-/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target vect_partial_vectors } } } */
-/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" { target { ! vect_partial_vectors } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 2 "vect" { xfail { vect_early_break && { ! vect_hw_misalign } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_early_break && { ! vect_hw_misalign } } } } } */
/* { dg-additional-options "-Ofast" } */
/* This will fail because we cannot SLP the load groups yet. */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_partial_vectors } } } */
-/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_partial_vectors } } } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
#define N 1024
unsigned vect_a[N];