We could vectorise:
for (...)
{
a[0] = ...;
a[1] = ...;
a[2] = ...;
a[3] = ...;
a += stride;
}
(including the case when stride == 8) but not:
for (...)
{
a[0] = ...;
a[1] = ...;
a[2] = ...;
a[3] = ...;
a += 8;
}
(where the stride is always 8). The former was treated as a "grouped
and strided" store, while the latter was treated as a grouped store
with gaps, which we don't support.
This patch makes us treat groups of stores with gaps at the end as
strided groups too. I tried to go through all uses of STMT_VINFO_STRIDED_P
and all vector uses of DR_STEP to see whether there were any hard-baked
assumptions, but couldn't see any. I wondered whether we should relax:
/* We do not have to consider dependences between accesses that belong
to the same group, unless the stride could be smaller than the
group size. */
if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
&& (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
== DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
&& !STMT_VINFO_STRIDED_P (stmtinfo_a))
return false;
for cases in which the step is constant and the absolute step is known
to be greater than the group size, but data dependence analysis should
already return chrec_known for those cases.
The new test is a version of vect-avg-15.c with the variable step
replaced by a constant one.
A natural follow-on would be to do the same for groups with gaps in
the middle:
/* Check that the distance between two accesses is equal to the type
size. Otherwise, we have gaps. */
diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
- TREE_INT_CST_LOW (prev_init)) / type_size;
if (diff != 1)
{
[...]
if (DR_IS_WRITE (data_ref))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"interleaved store with gaps\n");
return false;
}
But I think we should do that separately and see what the fallout
from this change is first.
2018-08-22 Richard Sandiford <richard.sandiford@arm.com>
gcc/
* tree-vect-data-refs.c (vect_analyze_group_access_1): Convert
grouped stores with gaps to a strided group.
gcc/testsuite/
* gcc.dg/vect/vect-avg-16.c: New test.
* gcc.dg/vect/slp-37.c: Expect the loop to be vectorized.
* gcc.dg/vect/vect-strided-u8-i8-gap4.c,
* gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c: Likewise for
the second loop in main1.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263772
138bc75d-0d04-0410-961f-
82ee72b054a4
+2018-08-22 Richard Sandiford <richard.sandiford@arm.com>
+
+ * tree-vect-data-refs.c (vect_analyze_group_access_1): Convert
+ grouped stores with gaps to a strided group.
+
2018-08-22 Richard Sandiford <richard.sandiford@arm.com>
* tree-vect-stmts.c (get_group_load_store_type)
+2018-08-22 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.dg/vect/vect-avg-16.c: New test.
+ * gcc.dg/vect/slp-37.c: Expect the loop to be vectorized.
+ * gcc.dg/vect/vect-strided-u8-i8-gap4.c,
+ * gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c: Likewise for
+ the second loop in main1.
+
2018-08-22 Iain Sandoe <iain@sandoe.co.uk>
* gcc.dg/lto/pr85248_0.c (test_alias):
int i;
s1 *ptr = arr;
- /* Different constant types - not SLPable. The group size is not power of 2,
- interleaving is not supported either. */
+ /* Vectorized as a strided SLP pair of accesses to <a, b> and a single
+ strided access to c. */
for (i = 0; i < N; i++)
{
ptr->a = 6;
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
-
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
--- /dev/null
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 80
+
+void __attribute__ ((noipa))
+f (signed char *restrict a, signed char *restrict b,
+ signed char *restrict c, int n)
+{
+ for (int j = 0; j < n; ++j)
+ {
+ for (int i = 0; i < 16; ++i)
+ a[i] = (b[i] + c[i]) >> 1;
+ a += 20;
+ b += 20;
+ c += 20;
+ }
+}
+
+#define BASE1 -126
+#define BASE2 -42
+
+signed char a[N], b[N], c[N];
+
+int
+main (void)
+{
+ check_vect ();
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = i;
+ b[i] = BASE1 + i * 3;
+ c[i] = BASE2 + i * 2;
+ asm volatile ("" ::: "memory");
+ }
+ f (a, b, c, N / 20);
+ for (int i = 0; i < N; ++i)
+ {
+ int d = (BASE1 + BASE2 + i * 5) >> 1;
+ if (a[i] != (i % 20 < 16 ? d : i))
+ __builtin_abort ();
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
}
ptr = arr;
- /* Not vectorizable: gap in store. */
+ /* Vectorized as a strided SLP pair. */
for (i = 0; i < N; i++)
{
res[i].a = ptr->b;
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
-
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_strided8 } } } */
}
ptr = arr;
- /* Not vectorizable: gap in store. */
+ /* Vectorized as a strided SLP pair. */
for (i = 0; i < N; i++)
{
res[i].a = ptr->b;
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
-
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_strided8 } } } */
if (groupsize != count
&& !DR_IS_READ (dr))
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "interleaved store with gaps\n");
- return false;
+ groupsize = count;
+ STMT_VINFO_STRIDED_P (stmt_info) = true;
}
/* If there is a gap after the last load in the group it is the
"Detected interleaving ");
if (DR_IS_READ (dr))
dump_printf (MSG_NOTE, "load ");
+ else if (STMT_VINFO_STRIDED_P (stmt_info))
+ dump_printf (MSG_NOTE, "strided store ");
else
dump_printf (MSG_NOTE, "store ");
dump_printf (MSG_NOTE, "of size %u starting with ",