]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
x86: Get the widest vector mode from MOVE_MAX
authorH.J. Lu <hjl.tools@gmail.com>
Wed, 18 Jun 2025 21:03:48 +0000 (05:03 +0800)
committerH.J. Lu <hjl.tools@gmail.com>
Fri, 20 Jun 2025 07:59:35 +0000 (15:59 +0800)
Since MOVE_MAX defines the maximum number of bytes that an instruction
can move quickly between memory and registers, use it to get the widest
vector mode in vector loop when inlining memcpy and memset.

gcc/

PR target/120708
* config/i386/i386-expand.cc (ix86_expand_set_or_cpymem): Use
MOVE_MAX to get the widest vector mode in vector loop.

gcc/testsuite/

PR target/120708
* gcc.target/i386/memcpy-pr120708-1.c: New test.
* gcc.target/i386/memcpy-pr120708-2.c: Likewise.
* gcc.target/i386/memcpy-pr120708-3.c: Likewise.
* gcc.target/i386/memcpy-pr120708-4.c: Likewise.
* gcc.target/i386/memcpy-pr120708-5.c: Likewise.
* gcc.target/i386/memcpy-pr120708-6.c: Likewise.
* gcc.target/i386/memset-pr120708-1.c: Likewise.
* gcc.target/i386/memset-pr120708-2.c: Likewise.
* gcc.target/i386/memcpy-strategy-1.c: Drop dg-skip-if.  Replace
-march=atom with -mno-avx -msse2 -mtune=generic
-mtune-ctrl=^sse_typeless_stores.
* gcc.target/i386/memcpy-strategy-2.c: Likewise.
* gcc.target/i386/memcpy-vector_loop-1.c: Likewise.
* gcc.target/i386/memcpy-vector_loop-2.c: Likewise.
* gcc.target/i386/memset-vector_loop-1.c: Likewise.
* gcc.target/i386/memset-vector_loop-2.c: Likewise.

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
15 files changed:
gcc/config/i386/i386-expand.cc
gcc/testsuite/gcc.target/i386/memcpy-pr120708-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/memcpy-pr120708-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/memcpy-pr120708-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/memcpy-pr120708-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/memcpy-pr120708-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/memcpy-pr120708-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/memcpy-strategy-1.c
gcc/testsuite/gcc.target/i386/memcpy-strategy-2.c
gcc/testsuite/gcc.target/i386/memcpy-vector_loop-1.c
gcc/testsuite/gcc.target/i386/memcpy-vector_loop-2.c
gcc/testsuite/gcc.target/i386/memset-pr120708-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/memset-pr120708-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/memset-vector_loop-1.c
gcc/testsuite/gcc.target/i386/memset-vector_loop-2.c

index 4946f87a1317f65d6c9e31a2e4c6caa98d0c8194..423fc632003d9e32559d6633cc9046ab225c70bb 100644 (file)
@@ -9351,7 +9351,6 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   bool need_zero_guard = false;
   bool noalign;
   machine_mode move_mode = VOIDmode;
-  machine_mode wider_mode;
   int unroll_factor = 1;
   /* TODO: Once value ranges are available, fill in proper data.  */
   unsigned HOST_WIDE_INT min_size = 0;
@@ -9427,6 +9426,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
 
   unroll_factor = 1;
   move_mode = word_mode;
+  int nunits;
   switch (alg)
     {
     case libcall:
@@ -9447,27 +9447,14 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
     case vector_loop:
       need_zero_guard = true;
       unroll_factor = 4;
-      /* Find the widest supported mode.  */
-      move_mode = word_mode;
-      while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
-            && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
-       move_mode = wider_mode;
-
-      if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
-       move_mode = TImode;
-      if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256)
-       move_mode = OImode;
-
-      /* Find the corresponding vector mode with the same size as MOVE_MODE.
-        MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
-      if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
-       {
-         int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
-         if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
-             || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
-           move_mode = word_mode;
-       }
-      gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
+      /* Get the vector mode to move MOVE_MAX bytes.  */
+      nunits = MOVE_MAX / GET_MODE_SIZE (word_mode);
+      if (nunits > 1)
+       {
+         move_mode = mode_for_vector (word_mode, nunits).require ();
+         gcc_assert (optab_handler (mov_optab, move_mode)
+                     != CODE_FOR_nothing);
+       }
       break;
     case rep_prefix_8_byte:
       move_mode = DImode;
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-pr120708-1.c b/gcc/testsuite/gcc.target/i386/memcpy-pr120708-1.c
new file mode 100644 (file)
index 0000000..d4fe2ad
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
+/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+
+char a[2048];
+char b[2048];
+void t (void)
+{
+  __builtin_memcpy (a, b, 2048);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-pr120708-2.c b/gcc/testsuite/gcc.target/i386/memcpy-pr120708-2.c
new file mode 100644 (file)
index 0000000..9a6fcfd
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
+/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+
+char *a;
+char *b;
+void t (void)
+{
+  __builtin_memcpy (a, b, 2048);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-pr120708-3.c b/gcc/testsuite/gcc.target/i386/memcpy-pr120708-3.c
new file mode 100644 (file)
index 0000000..010ac24
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
+/* { dg-options "-O2 -march=atom -mmemcpy-strategy=vector_loop:-1:align" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+
+char a[2048];
+char b[2048];
+void t (void)
+{
+  __builtin_memcpy (a, b, 2048);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-pr120708-4.c b/gcc/testsuite/gcc.target/i386/memcpy-pr120708-4.c
new file mode 100644 (file)
index 0000000..87a58ef
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
+/* { dg-options "-O2 -march=atom -mmemcpy-strategy=vector_loop:3000:align,libcall:-1:align" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+
+char a[2048];
+char b[2048];
+void t (void)
+{
+  __builtin_memcpy (a, b, 2048);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-pr120708-5.c b/gcc/testsuite/gcc.target/i386/memcpy-pr120708-5.c
new file mode 100644 (file)
index 0000000..19e0600
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -mprefer-vector-width=128 -mmemcpy-strategy=vector_loop:2048:noalign,libcall:-1:noalign" } */
+
+#define SIZE (16 + 1) * 16
+
+char dest[SIZE];
+char src[SIZE];
+
+void
+foo (void)
+{
+  __builtin_memcpy (dest, src, SIZE);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \t]\+\[^\n\r]*%xmm\[0-9\]\+" 10 } } */
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-pr120708-6.c b/gcc/testsuite/gcc.target/i386/memcpy-pr120708-6.c
new file mode 100644 (file)
index 0000000..17b101f
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -mprefer-vector-width=256 -mmemcpy-strategy=vector_loop:2048:noalign,libcall:-1:noalign" } */
+
+#define SIZE (16 + 1) * 32
+
+char dest[SIZE];
+char src[SIZE];
+
+void
+foo (void)
+{
+  __builtin_memcpy (dest, src, SIZE);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \t]\+\[^\n\r]*%ymm\[0-9\]\+" 10 } } */
index 6ac80c9105330b494ce2d497ad26e1446b3ef263..b29867388928be87c09ec186cd53981cb61d4477 100644 (file)
@@ -1,6 +1,5 @@
 /* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -mmemcpy-strategy=vector_loop:-1:align" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -mmemcpy-strategy=vector_loop:-1:align" } */
 /* { dg-final { scan-assembler-times "movdqa" 8 } } */
 
 char a[2048];
index c103896a1106a1fd646d545261f215bc5544ed49..18e260b0191a4d1b979a84e67a0f0fa2cdae27fd 100644 (file)
@@ -1,6 +1,5 @@
 /* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -mmemcpy-strategy=vector_loop:3000:align,libcall:-1:align" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -mmemcpy-strategy=vector_loop:3000:align,libcall:-1:align" } */
 /* { dg-final { scan-assembler-times "movdqa" 8 } } */
 
 char a[2048];
index 93f428acc8595fe615e2e415132fd32c165ac061..cec8c90e56557e842eb53746dce85fbb7bad3e64 100644 (file)
@@ -1,6 +1,5 @@
 /* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -minline-all-stringops -mstringop-strategy=vector_loop" } */
 /* { dg-final { scan-assembler-times "movdqa" 8 } } */
 
 char a[2048];
index ab235401972f21d546c0ea71b842ece4d3262809..314eb3d5b53e86c9587971d4a7b0855da63e2162 100644 (file)
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
-/* { dg-final { scan-assembler-times "movdqa" 4} } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-final { scan-assembler-times "movdqa" 4 } } */
 
 char *a;
 char *b;
diff --git a/gcc/testsuite/gcc.target/i386/memset-pr120708-1.c b/gcc/testsuite/gcc.target/i386/memset-pr120708-1.c
new file mode 100644 (file)
index 0000000..fba0588
--- /dev/null
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -mprefer-vector-width=128 -mmemset-strategy=vector_loop:256:noalign,libcall:-1:noalign" } */
+
+void
+foo (char *dest)
+{
+  __builtin_memset (dest, 0, 254);
+}
+
+/* { dg-final { scan-assembler "vmovdqu\[ \t]\+%xmm\[0-9\]+, \\(\[^\n\r]*\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/memset-pr120708-2.c b/gcc/testsuite/gcc.target/i386/memset-pr120708-2.c
new file mode 100644 (file)
index 0000000..d9a3e7e
--- /dev/null
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -mprefer-vector-width=256 -mmemset-strategy=vector_loop:256:noalign,libcall:-1:noalign" } */
+
+void
+foo (char *dest)
+{
+  __builtin_memset (dest, 0, 254);
+}
+
+/* { dg-final { scan-assembler "vmovdqu\[ \t]\+%ymm\[0-9\]+, \\(\[^\n\r]*\\)" } } */
index d6fdc98190816dfab8aac623e84a2b41b726bf34..5bb30a844eab96c686e1da967d1e365e46fd3107 100644 (file)
@@ -1,6 +1,5 @@
 /* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -minline-all-stringops -mstringop-strategy=vector_loop" } */
 /* { dg-final { scan-assembler-times "movdqa" 4 } } */
 
 char a[2048];
index bce8be0ffae24dd4afe57ab63a17449e7843b23f..6e31070ee86c66485554179420bd41eb3cf58e35 100644 (file)
@@ -1,6 +1,5 @@
 /* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=atom" } } */
-/* { dg-options "-O2 -march=atom -minline-all-stringops -mstringop-strategy=vector_loop" } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic -mtune-ctrl=^sse_typeless_stores -mstringop-strategy=vector_loop" } */
 /* { dg-final { scan-assembler-times "movdqa" 4} } */
 
 char *a;