bool need_zero_guard = false;
bool noalign;
machine_mode move_mode = VOIDmode;
- machine_mode wider_mode;
int unroll_factor = 1;
/* TODO: Once value ranges are available, fill in proper data. */
unsigned HOST_WIDE_INT min_size = 0;
unroll_factor = 1;
move_mode = word_mode;
+ int nunits;
switch (alg)
{
case libcall:
case vector_loop:
need_zero_guard = true;
unroll_factor = 4;
- /* Find the widest supported mode. */
- move_mode = word_mode;
- while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
- && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
- move_mode = wider_mode;
-
- if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
- move_mode = TImode;
- if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256)
- move_mode = OImode;
-
- /* Find the corresponding vector mode with the same size as MOVE_MODE.
- MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
- if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
- {
- int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
- if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
- || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
- move_mode = word_mode;
- }
- gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
+ /* Get the vector mode to move MOVE_MAX bytes. */
+ nunits = MOVE_MAX / GET_MODE_SIZE (word_mode);
+ if (nunits > 1)
+ {
+ move_mode = mode_for_vector (word_mode, nunits).require ();
+ gcc_assert (optab_handler (mov_optab, move_mode)
+ != CODE_FOR_nothing);
+ }
break;
case rep_prefix_8_byte:
move_mode = DImode;
--- /dev/null
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
+/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+
+char a[2048];
+char b[2048];
+void t (void)
+{
+ __builtin_memcpy (a, b, 2048);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
+/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+
+char *a;
+char *b;
+void t (void)
+{
+ __builtin_memcpy (a, b, 2048);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
+/* { dg-options "-O2 -march=atom -mmemcpy-strategy=vector_loop:-1:align" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+
+char a[2048];
+char b[2048];
+void t (void)
+{
+ __builtin_memcpy (a, b, 2048);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
+/* { dg-options "-O2 -march=atom -mmemcpy-strategy=vector_loop:3000:align,libcall:-1:align" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+
+char a[2048];
+char b[2048];
+void t (void)
+{
+ __builtin_memcpy (a, b, 2048);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -mprefer-vector-width=128 -mmemcpy-strategy=vector_loop:2048:noalign,libcall:-1:noalign" } */
+
+#define SIZE (16 + 1) * 16
+
+char dest[SIZE];
+char src[SIZE];
+
+void
+foo (void)
+{
+ __builtin_memcpy (dest, src, SIZE);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \t]\+\[^\n\r]*%xmm\[0-9\]\+" 10 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -mprefer-vector-width=256 -mmemcpy-strategy=vector_loop:2048:noalign,libcall:-1:noalign" } */
+
+#define SIZE (16 + 1) * 32
+
+char dest[SIZE];
+char src[SIZE];
+
+void
+foo (void)
+{
+ __builtin_memcpy (dest, src, SIZE);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \t]\+\[^\n\r]*%ymm\[0-9\]\+" 10 } } */
/* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -mmemcpy-strategy=vector_loop:-1:align" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -mmemcpy-strategy=vector_loop:-1:align" } */
/* { dg-final { scan-assembler-times "movdqa" 8 } } */
char a[2048];
/* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -mmemcpy-strategy=vector_loop:3000:align,libcall:-1:align" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -mmemcpy-strategy=vector_loop:3000:align,libcall:-1:align" } */
/* { dg-final { scan-assembler-times "movdqa" 8 } } */
char a[2048];
/* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -minline-all-stringops -mstringop-strategy=vector_loop" } */
/* { dg-final { scan-assembler-times "movdqa" 8 } } */
char a[2048];
/* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
-/* { dg-final { scan-assembler-times "movdqa" 4} } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-times "movdqa" 4 } } */
char *a;
char *b;
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -mprefer-vector-width=128 -mmemset-strategy=vector_loop:256:noalign,libcall:-1:noalign" } */
+
+void
+foo (char *dest)
+{
+ __builtin_memset (dest, 0, 254);
+}
+
+/* { dg-final { scan-assembler "vmovdqu\[ \t]\+%xmm\[0-9\]+, \\(\[^\n\r]*\\)" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -mprefer-vector-width=256 -mmemset-strategy=vector_loop:256:noalign,libcall:-1:noalign" } */
+
+void
+foo (char *dest)
+{
+ __builtin_memset (dest, 0, 254);
+}
+
+/* { dg-final { scan-assembler "vmovdqu\[ \t]\+%ymm\[0-9\]+, \\(\[^\n\r]*\\)" } } */
/* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -minline-all-stringops -mstringop-strategy=vector_loop" } */
/* { dg-final { scan-assembler-times "movdqa" 4 } } */
char a[2048];
/* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -mstringop-strategy=vector_loop" } */
/* { dg-final { scan-assembler-times "movdqa" 4} } */
char *a;