]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES
authorH.J. Lu <hjl.tools@gmail.com>
Thu, 26 Aug 2021 12:31:50 +0000 (05:31 -0700)
committerliuhongt <hongtao.liu@intel.com>
Mon, 13 Sep 2021 11:55:29 +0000 (19:55 +0800)
1. Add TARGET_AVX256_MOVE_BY_PIECES to perform move by-pieces operation
with 256-bit AVX instructions.
2. Add TARGET_AVX256_STORE_BY_PIECES to perform move and store by-pieces
operations with 256-bit AVX instructions.

They are enabled only for Intel Alder Lake and Intel processors with
AVX512.

gcc/

PR target/101935
* config/i386/i386.h (TARGET_AVX256_MOVE_BY_PIECES): New.
(TARGET_AVX256_STORE_BY_PIECES): Likewise.
(MOVE_MAX): Check TARGET_AVX256_MOVE_BY_PIECES and
TARGET_AVX256_STORE_BY_PIECES instead of
TARGET_AVX256_SPLIT_UNALIGNED_LOAD and
TARGET_AVX256_SPLIT_UNALIGNED_STORE.
(STORE_MAX_PIECES): Check TARGET_AVX256_STORE_BY_PIECES instead
of TARGET_AVX256_SPLIT_UNALIGNED_STORE.
* config/i386/x86-tune.def (X86_TUNE_AVX256_MOVE_BY_PIECES): New.
(X86_TUNE_AVX256_STORE_BY_PIECES): Likewise.

gcc/testsuite/

PR target/101935
* g++.target/i386/pr80566-1.C: Add
-mtune-ctrl=avx256_store_by_pieces.
* gcc.target/i386/pr100865-4a.c: Likewise.
* gcc.target/i386/pr100865-10a.c: Likewise.
* gcc.target/i386/pr90773-20.c: Likewise.
* gcc.target/i386/pr90773-21.c: Likewise.
* gcc.target/i386/pr90773-22.c: Likewise.
* gcc.target/i386/pr90773-23.c: Likewise.
* g++.target/i386/pr80566-2.C: Add
-mtune-ctrl=avx256_move_by_pieces.
* gcc.target/i386/eh_return-1.c: Likewise.
* gcc.target/i386/pr90773-26.c: Likewise.
* gcc.target/i386/pieces-memcpy-12.c: Replace -mtune=haswell
with -mtune-ctrl=avx256_move_by_pieces.
* gcc.target/i386/pieces-memcpy-15.c: Likewise.
* gcc.target/i386/pieces-memset-2.c: Replace -mtune=haswell
with -mtune-ctrl=avx256_store_by_pieces.
* gcc.target/i386/pieces-memset-5.c: Likewise.
* gcc.target/i386/pieces-memset-11.c: Likewise.
* gcc.target/i386/pieces-memset-14.c: Likewise.
* gcc.target/i386/pieces-memset-20.c: Likewise.
* gcc.target/i386/pieces-memset-23.c: Likewise.
* gcc.target/i386/pieces-memset-29.c: Likewise.
* gcc.target/i386/pieces-memset-30.c: Likewise.
* gcc.target/i386/pieces-memset-33.c: Likewise.
* gcc.target/i386/pieces-memset-34.c: Likewise.
* gcc.target/i386/pieces-memset-44.c: Likewise.
* gcc.target/i386/pieces-memset-37.c: Replace -mtune=generic
with -mtune-ctrl=avx256_store_by_pieces.

26 files changed:
gcc/config/i386/i386.h
gcc/config/i386/x86-tune.def
gcc/testsuite/g++.target/i386/pr80566-1.C
gcc/testsuite/g++.target/i386/pr80566-2.C
gcc/testsuite/gcc.target/i386/eh_return-1.c
gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
gcc/testsuite/gcc.target/i386/pieces-memset-11.c
gcc/testsuite/gcc.target/i386/pieces-memset-14.c
gcc/testsuite/gcc.target/i386/pieces-memset-2.c
gcc/testsuite/gcc.target/i386/pieces-memset-20.c
gcc/testsuite/gcc.target/i386/pieces-memset-23.c
gcc/testsuite/gcc.target/i386/pieces-memset-29.c
gcc/testsuite/gcc.target/i386/pieces-memset-30.c
gcc/testsuite/gcc.target/i386/pieces-memset-33.c
gcc/testsuite/gcc.target/i386/pieces-memset-34.c
gcc/testsuite/gcc.target/i386/pieces-memset-37.c
gcc/testsuite/gcc.target/i386/pieces-memset-44.c
gcc/testsuite/gcc.target/i386/pieces-memset-5.c
gcc/testsuite/gcc.target/i386/pr100865-10a.c
gcc/testsuite/gcc.target/i386/pr100865-4a.c
gcc/testsuite/gcc.target/i386/pr90773-20.c
gcc/testsuite/gcc.target/i386/pr90773-21.c
gcc/testsuite/gcc.target/i386/pr90773-22.c
gcc/testsuite/gcc.target/i386/pr90773-23.c
gcc/testsuite/gcc.target/i386/pr90773-26.c

index 73237b81cf814382f65cd8fa3f5721522bf6a832..e76bb55c080d078e2a5334597a106b4331698e92 100644 (file)
@@ -403,6 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
 #define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
        ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
+#define TARGET_AVX256_MOVE_BY_PIECES \
+       ix86_tune_features[X86_TUNE_AVX256_MOVE_BY_PIECES]
+#define TARGET_AVX256_STORE_BY_PIECES \
+       ix86_tune_features[X86_TUNE_AVX256_STORE_BY_PIECES]
 #define TARGET_AVX256_SPLIT_REGS \
        ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
 #define TARGET_GENERAL_REGS_SSE_SPILL \
@@ -1793,8 +1797,8 @@ typedef struct ix86_args {
    ? 64 \
    : ((TARGET_AVX \
        && !TARGET_PREFER_AVX128 \
-       && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD \
-       && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+       && (TARGET_AVX256_MOVE_BY_PIECES \
+          || TARGET_AVX256_STORE_BY_PIECES)) \
       ? 32 \
       : ((TARGET_SSE2 \
          && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
@@ -1811,7 +1815,7 @@ typedef struct ix86_args {
       ? 64 \
       : ((TARGET_AVX \
          && !TARGET_PREFER_AVX128 \
-         && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+         && TARGET_AVX256_STORE_BY_PIECES) \
          ? 32 \
          : ((TARGET_SSE2 \
              && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
index 8f55da89c920d8bc5cd99b273afdddae26eb82f2..2f221b1f8c9b8674dc17e67a91747e55fe59118e 100644 (file)
@@ -484,6 +484,17 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
    instructions in the auto-vectorizer.  */
 DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512)
 
+/* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit
+   AVX instructions.  */
+DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces",
+         m_ALDERLAKE | m_CORE_AVX512)
+
+/* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit
+   AVX instructions.  */
+DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces",
+         m_ALDERLAKE | m_CORE_AVX512)
+
+/*****************************************************************************/
 /*****************************************************************************/
 /* Historical relics: tuning flags that helps a specific old CPU designs     */
 /*****************************************************************************/
index 753f974052996b7795782bf93ede68db881f31e5..29da31d6bb62381db3196ea2f3ea228da04109ab 100644 (file)
@@ -1,5 +1,5 @@
 // { dg-do compile }
-// { dg-options "-O2 -march=haswell" }
+// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_store_by_pieces" }
 
 #include <cstring>
 
index 2a2e82d0a3a3245e5d22d4afa39e021eaa4fb7eb..9ffd2c8cadb83e0b32df83779516c4acaabd8d2e 100644 (file)
@@ -1,5 +1,5 @@
 // { dg-do compile }
-// { dg-options "-O2 -march=haswell" }
+// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_move_by_pieces" }
 
 #include <cstring>
 
index 671ba635e88ca4c4b418e8c828077d144e22f8c0..b21fd75fc93d96bba6d9c4df4c4667ae22b1ab8e 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=haswell -mno-avx512f" } */
+/* { dg-options "-O2 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */
 
 struct _Unwind_Context
 {
index f1432ebe517860941c2caccb07a9baf697747383..8a82baff5f1ef913abba6200c192d4709454066a 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
 
 extern char *dst, *src;
 
index 695e8c3fa673cfe385e8002208d56198b80d4da5..4fb94ce7bd57e8618f387ae3c8b0856d5aa4b41e 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
 
 extern char *dst, *src;
 
index 3fb9038b04ff3773496387eb09cf2a446ce3d2c8..3802eb7c14727199b70381e87c3ed5167c5ad94f 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index 45ece482464a1065aa503bc2e672e8f35f3059ad..10bc085f83b39e5b88a096d1c0ac995192cdd6d1 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index 649f344e8f639789a2e4a39c4c3f810f684ed4fc..4ebfc4df090ad4aea92f5f233818d92b9fb813a1 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index b8747e669e85460e898c6358e6dcdeaf7be61ec4..1dc4db180d307698122b07456cdb5e7c331a7629 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index a3b4ffc18e00deef6eefaedc8c231178913b557a..9232864024e5c5dc3d421f1d00c43d1cf68cc020 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index 650e6fe66a56185beecdcdf1b0f102f753250866..3b07a64e3f632353d94cfb8770d6abe734c94d99 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index dcec2c700fcbe252fe4dffe967c821334f7ad3cd..59595e6d3c4069b88c8391ddec628f8c114f5375 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index a87d1b80ae67594970195f7c040520656aa3a929..68646223b0edd1623fa1b75167f97df8e2a37b57 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index 0c2f1ee6049f63f8337d77c7cf7ef3d47955b088..52a16a0292d1c7b4f446cb06031647a7e95731be 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index ec59497b11614fb5582eafcf3375c25eb116cffe..fd09bd153ce38f457c58e12360bbc28649b8847e 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
 
 void
 foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst)
index ecc31be1a34748bf04933e105310cd76271e566e..5986f8e8b2392ba83165cd53dbb589a462f86d19 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index 3e95db5efef82731f1505bcd6d1cf896266c6725..e2379df71aafbc46a52ee99dfb20eee06d617068 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index 98b6dfb16f330b7b4300f50146e082f39c380d80..1d849a381c05de8df0aee2bd601dd8b975ad2c00 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile { target int128 } } */
-/* { dg-options "-O3 -march=skylake" } */
+/* { dg-options "-O3 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern __int128 array[16];
 
index 365487337aee241469f9be10d32f550cf068b455..8609d1128b8dd6178253e576dfe18f7e198bf76b 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char array[64];
 
index e61e405f2b6b7d1d2a43ff177535cc129790f58c..884a5502b5925653b1dfc963fc313f7cde13ad5e 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index 16ad17f3cbbdba0d25fbc6831a5b76bff96d60bb..5bbb387a3ea692d2a39fde843c3a61180753011f 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index 45a8ff65a849c833fd663ed05269ef850958b551..245a436b7eb1e518510e99882b647a0627b28b9d 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index 9256ce10ff0fdf0d0784b65fd707faed1d43b40e..ca4a86f30b854adf69a92294a0f50e645a351e31 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
index b2513c3a9c8d810d20bb567b5aeb1faa8ac20a2c..76fb79f2e2031f182846c6b9622ff8251a364286 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_move_by_pieces" } */
 
 struct S
 {