]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Align ix86_{move_max,store_max} with vectorizer.
authorliuhongt <hongtao.liu@intel.com>
Thu, 15 Aug 2024 04:54:07 +0000 (12:54 +0800)
committerliuhongt <hongtao.liu@intel.com>
Thu, 22 Aug 2024 02:36:04 +0000 (10:36 +0800)
When none of mprefer-vector-width, avx256_optimal/avx128_optimal,
avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will
set ix86_{move_max,store_max} as max available vector length except
for AVX part.

      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
opts->x_ix86_move_max = PVW_AVX512;
      else
opts->x_ix86_move_max = PVW_AVX128;

So for -mavx2, vectorizer will choose 256-bit for vectorization, but
128-bit is used for struct copy, there could be a potential STLF issue
due to this "misalign".

The patch fixes that.

gcc/ChangeLog:

* config/i386/i386-options.cc (ix86_option_override_internal):
set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX
instead of PVW_AVX128.

gcc/testsuite/ChangeLog:
* gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128.
* gcc.target/i386/pieces-memcpy-6.c: Ditto.
* gcc.target/i386/pieces-memset-38.c: Ditto.
* gcc.target/i386/pieces-memset-40.c: Ditto.
* gcc.target/i386/pieces-memset-41.c: Ditto.
* gcc.target/i386/pieces-memset-42.c: Ditto.
* gcc.target/i386/pieces-memset-43.c: Ditto.
* gcc.target/i386/pieces-strcpy-2.c: Ditto.
* gcc.target/i386/pieces-memcpy-22.c: New test.
* gcc.target/i386/pieces-memset-51.c: New test.
* gcc.target/i386/pieces-strcpy-3.c: New test.

(cherry picked from commit aea374238cec1a1e53fb79575d2f998e16926999)

12 files changed:
gcc/config/i386/i386-options.cc
gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
gcc/testsuite/gcc.target/i386/pieces-memset-38.c
gcc/testsuite/gcc.target/i386/pieces-memset-40.c
gcc/testsuite/gcc.target/i386/pieces-memset-41.c
gcc/testsuite/gcc.target/i386/pieces-memset-42.c
gcc/testsuite/gcc.target/i386/pieces-memset-43.c
gcc/testsuite/gcc.target/i386/pieces-memset-51.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c [new file with mode: 0644]

index 318f6c6145513bdfdb279020e77c1ad32009e324..ad496ea5a8ebe3c3886f809dba79ec05e9e3e6ea 100644 (file)
@@ -2766,6 +2766,9 @@ ix86_option_override_internal (bool main_args_p,
            {
              if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
                opts->x_ix86_move_max = PVW_AVX512;
+             /* Align with vectorizer to avoid potential STLF issue.  */
+             else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
+               opts->x_ix86_move_max = PVW_AVX256;
              else
                opts->x_ix86_move_max = PVW_AVX128;
            }
@@ -2787,6 +2790,9 @@ ix86_option_override_internal (bool main_args_p,
            {
              if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
                opts->x_ix86_store_max = PVW_AVX512;
+             /* Align with vectorizer to avoid potential STLF issue.  */
+             else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
+               opts->x_ix86_store_max = PVW_AVX256;
              else
                opts->x_ix86_store_max = PVW_AVX128;
            }
index 5faee21f9b9931c8bf8294bcd5d551815c4dab45..53ad0b3be4436e03861405ba2ab0661a9c765157 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst, *src;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
new file mode 100644 (file)
index 0000000..605b362
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
index 5f99cc98c4720c902ad4ce93bca5372bb98438b4..cfd2a86cf33bd2648062c7616d367e6e7a87164f 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst, *src;
 
index ed4a24a54fda5b767ad54a92f490596b1f773d5c..ddd194debd57d5c48030c9e21909930d3d79efbe 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst;
 
index 4eda73ead5924786994f4edd77b5a8f5ff7db7e0..9c206465d465625a2819c42895377b3f116f521a 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst;
 
index 93df8101e4d0fe5654555d8fac7a1ea4344f1c80..b0756182e355dbfb2224320033e9a56df875be52 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */
 
 extern char *dst;
 
index df0c122aae71d4d010064719a632e5b42895b9d0..103da699ae5273ee077ff9173792ad0c4b91db2c 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst;
 
index 2f2179c2df9e3797b5f6b7276cd00cb4a4c8be92..f1494e176105b43c8d6c4cf814bf9ea594768845 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
new file mode 100644 (file)
index 0000000..192ec0d
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
index 90446edb4f35086353359af1801fafe73a0ab74d..9bb94b7419b3677eb77e29bfddf7bed947271215 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *strcpy (char *, const char *);
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
new file mode 100644 (file)
index 0000000..df7571b
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *strcpy (char *, const char *);
+
+void
+foo (char *s)
+{
+  strcpy (s,
+         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
+         "1234567");
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */