--- /dev/null
+/* { dg-require-effective-target mmap } */
+
+#include <sys/mman.h>
+#include <stdio.h>
+
+#define COUNT 128
+#define MMAP_SIZE 0x20000
+#define ADDRESS 0x1122000000
+#define TYPE unsigned int
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+void __attribute__((noipa))
+loop (TYPE *restrict x, TYPE *restrict y)
+{
+ for (int i = 0; i < COUNT; ++i)
+ {
+ x[i * 4] = y[i * 2] + 1;
+ x[i * 4 + 1] = y[i * 2] + 2;
+ x[i * 4 + 2] = y[i * 2 + 1] + 3;
+ x[i * 4 + 3] = y[i * 2 + 1] + 4;
+ }
+}
+
+TYPE x[COUNT * 4];
+
+int
+main (void)
+{
+ void *y;
+ TYPE *end_y;
+
+ y = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (y == MAP_FAILED)
+ {
+ perror ("mmap");
+ return 1;
+ }
+
+ end_y = (TYPE *) ((char *) y + MMAP_SIZE);
+
+ loop (x, end_y - COUNT * 2);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Data access with gaps requires scalar epilogue loop" "vect" { target { vect_perm && vect_int } } } } */
--- /dev/null
+/* { dg-require-effective-target mmap } */
+/* { dg-additional-options "-mssse3" { target x86_64-*-* i?86-*-* } } */
+
+#include <sys/mman.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define COUNT 128
+#define MMAP_SIZE 0x20000
+#define ADDRESS 0x1122000000
+#define TYPE unsigned short
+#define GROUP_SIZE 2
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+void __attribute__((noipa))
+loop (TYPE *restrict x, TYPE *restrict y)
+{
+ for (int i = 0; i < COUNT; ++i)
+ {
+ x[i * 8] = y[i * GROUP_SIZE] + 1;
+ x[i * 8 + 1] = y[i * GROUP_SIZE] + 2;
+ x[i * 8 + 2] = y[i * GROUP_SIZE + 1] + 3;
+ x[i * 8 + 3] = y[i * GROUP_SIZE + 1] + 4;
+ x[i * 8 + 4] = y[i * GROUP_SIZE] + 5;
+ x[i * 8 + 5] = y[i * GROUP_SIZE] + 6;
+ x[i * 8 + 6] = y[i * GROUP_SIZE + 1] + 7;
+ x[i * 8 + 7] = y[i * GROUP_SIZE + 1] + 8;
+ }
+}
+
+TYPE x[COUNT * 4];
+
+int
+main (void)
+{
+ void *y;
+ TYPE *end_y;
+
+ check_vect ();
+
+ y = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (y == MAP_FAILED)
+ {
+ perror ("mmap");
+ return 1;
+ }
+
+ end_y = (TYPE *) ((char *) y + MMAP_SIZE);
+
+ loop (x, end_y - COUNT * GROUP_SIZE);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "peeling for gaps insufficient for access" "vect" { target { vect_perm_short } } } } */
gcc_assert (!loop_vinfo || cmp > 0);
*memory_access_type = VMAT_CONTIGUOUS;
}
+
+ /* When we have a contiguous access across loop iterations
+ but the access in the loop doesn't cover the full vector
+ we can end up with no gap recorded but still excess
+ elements accessed, see PR103116. Make sure we peel for
+ gaps if necessary and sufficient and give up if not. */
+ if (loop_vinfo
+ && *memory_access_type == VMAT_CONTIGUOUS
+ && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
+ && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ nunits))
+ {
+ unsigned HOST_WIDE_INT cnunits, cvf;
+ if (!can_overrun_p
+ || !nunits.is_constant (&cnunits)
+ || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
+ /* Peeling for gaps assumes that a single scalar iteration
+ is enough to make sure the last vector iteration doesn't
+ access excess elements.
+ ??? Enhancements include peeling multiple iterations
+ or using masked loads with a static mask. */
+ || (group_size * cvf) % cnunits + group_size < cnunits)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "peeling for gaps insufficient for "
+ "access\n");
+ return false;
+ }
+ overrun_p = true;
+ }
}
}
else