From: H.J. Lu Date: Thu, 26 Aug 2021 12:31:50 +0000 (-0700) Subject: x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES X-Git-Tag: basepoints/gcc-13~4828 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5b01bfeb8703c264ad402b77741f06f41d7fceac;p=thirdparty%2Fgcc.git x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES 1. Add TARGET_AVX256_MOVE_BY_PIECES to perform move by-pieces operation with 256-bit AVX instructions. 2. Add TARGET_AVX256_STORE_BY_PIECES to perform move and store by-pieces operations with 256-bit AVX instructions. They are enabled only for Intel Alder Lake and Intel processors with AVX512. gcc/ PR target/101935 * config/i386/i386.h (TARGET_AVX256_MOVE_BY_PIECES): New. (TARGET_AVX256_STORE_BY_PIECES): Likewise. (MOVE_MAX): Check TARGET_AVX256_MOVE_BY_PIECES and TARGET_AVX256_STORE_BY_PIECES instead of TARGET_AVX256_SPLIT_UNALIGNED_LOAD and TARGET_AVX256_SPLIT_UNALIGNED_STORE. (STORE_MAX_PIECES): Check TARGET_AVX256_STORE_BY_PIECES instead of TARGET_AVX256_SPLIT_UNALIGNED_STORE. * config/i386/x86-tune.def (X86_TUNE_AVX256_MOVE_BY_PIECES): New. (X86_TUNE_AVX256_STORE_BY_PIECES): Likewise. gcc/testsuite/ PR target/101935 * g++.target/i386/pr80566-1.C: Add -mtune-ctrl=avx256_store_by_pieces. * gcc.target/i386/pr100865-4a.c: Likewise. * gcc.target/i386/pr100865-10a.c: Likewise. * gcc.target/i386/pr90773-20.c: Likewise. * gcc.target/i386/pr90773-21.c: Likewise. * gcc.target/i386/pr90773-22.c: Likewise. * gcc.target/i386/pr90773-23.c: Likewise. * g++.target/i386/pr80566-2.C: Add -mtune-ctrl=avx256_move_by_pieces. * gcc.target/i386/eh_return-1.c: Likewise. * gcc.target/i386/pr90773-26.c: Likewise. * gcc.target/i386/pieces-memcpy-12.c: Replace -mtune=haswell with -mtune-ctrl=avx256_move_by_pieces. * gcc.target/i386/pieces-memcpy-15.c: Likewise. * gcc.target/i386/pieces-memset-2.c: Replace -mtune=haswell with -mtune-ctrl=avx256_store_by_pieces. * gcc.target/i386/pieces-memset-5.c: Likewise. * gcc.target/i386/pieces-memset-11.c: Likewise. * gcc.target/i386/pieces-memset-14.c: Likewise. * gcc.target/i386/pieces-memset-20.c: Likewise. * gcc.target/i386/pieces-memset-23.c: Likewise. * gcc.target/i386/pieces-memset-29.c: Likewise. * gcc.target/i386/pieces-memset-30.c: Likewise. * gcc.target/i386/pieces-memset-33.c: Likewise. * gcc.target/i386/pieces-memset-34.c: Likewise. * gcc.target/i386/pieces-memset-44.c: Likewise. * gcc.target/i386/pieces-memset-37.c: Replace -mtune=generic with -mtune-ctrl=avx256_store_by_pieces. --- diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 73237b81cf81..e76bb55c080d 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -403,6 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR] #define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \ ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL] +#define TARGET_AVX256_MOVE_BY_PIECES \ + ix86_tune_features[X86_TUNE_AVX256_MOVE_BY_PIECES] +#define TARGET_AVX256_STORE_BY_PIECES \ + ix86_tune_features[X86_TUNE_AVX256_STORE_BY_PIECES] #define TARGET_AVX256_SPLIT_REGS \ ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS] #define TARGET_GENERAL_REGS_SSE_SPILL \ @@ -1793,8 +1797,8 @@ typedef struct ix86_args { ? 64 \ : ((TARGET_AVX \ && !TARGET_PREFER_AVX128 \ - && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD \ - && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \ + && (TARGET_AVX256_MOVE_BY_PIECES \ + || TARGET_AVX256_STORE_BY_PIECES)) \ ? 32 \ : ((TARGET_SSE2 \ && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \ @@ -1811,7 +1815,7 @@ typedef struct ix86_args { ? 64 \ : ((TARGET_AVX \ && !TARGET_PREFER_AVX128 \ - && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \ + && TARGET_AVX256_STORE_BY_PIECES) \ ? 32 \ : ((TARGET_SSE2 \ && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \ diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 8f55da89c920..2f221b1f8c9b 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -484,6 +484,17 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 instructions in the auto-vectorizer. */ DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512) +/* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit + AVX instructions. */ +DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces", + m_ALDERLAKE | m_CORE_AVX512) + +/* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit + AVX instructions. */ +DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", + m_ALDERLAKE | m_CORE_AVX512) + +/*****************************************************************************/ /*****************************************************************************/ /* Historical relics: tuning flags that helps a specific old CPU designs */ /*****************************************************************************/ diff --git a/gcc/testsuite/g++.target/i386/pr80566-1.C b/gcc/testsuite/g++.target/i386/pr80566-1.C index 753f97405299..29da31d6bb62 100644 --- a/gcc/testsuite/g++.target/i386/pr80566-1.C +++ b/gcc/testsuite/g++.target/i386/pr80566-1.C @@ -1,5 +1,5 @@ // { dg-do compile } -// { dg-options "-O2 -march=haswell" } +// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_store_by_pieces" } #include diff --git a/gcc/testsuite/g++.target/i386/pr80566-2.C b/gcc/testsuite/g++.target/i386/pr80566-2.C index 2a2e82d0a3a3..9ffd2c8cadb8 100644 --- a/gcc/testsuite/g++.target/i386/pr80566-2.C +++ b/gcc/testsuite/g++.target/i386/pr80566-2.C @@ -1,5 +1,5 @@ // { dg-do compile } -// { dg-options "-O2 -march=haswell" } +// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_move_by_pieces" } #include diff --git a/gcc/testsuite/gcc.target/i386/eh_return-1.c b/gcc/testsuite/gcc.target/i386/eh_return-1.c index 671ba635e88c..b21fd75fc93d 100644 --- a/gcc/testsuite/gcc.target/i386/eh_return-1.c +++ b/gcc/testsuite/gcc.target/i386/eh_return-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=haswell -mno-avx512f" } */ +/* { dg-options "-O2 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */ struct _Unwind_Context { diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c index f1432ebe5178..8a82baff5f1e 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c index 695e8c3fa673..4fb94ce7bd57 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c index 3fb9038b04ff..3802eb7c1472 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c index 45ece482464a..10bc085f83b3 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c index 649f344e8f63..4ebfc4df090a 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c index b8747e669e85..1dc4db180d30 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c index a3b4ffc18e00..9232864024e5 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c index 650e6fe66a56..3b07a64e3f63 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c index dcec2c700fcb..59595e6d3c40 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c index a87d1b80ae67..68646223b0ed 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c index 0c2f1ee6049f..52a16a0292d1 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c index ec59497b1161..fd09bd153ce3 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */ void foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst) diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c index ecc31be1a347..5986f8e8b239 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c index 3e95db5efef8..e2379df71aaf 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10a.c b/gcc/testsuite/gcc.target/i386/pr100865-10a.c index 98b6dfb16f33..1d849a381c05 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-10a.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-10a.c @@ -1,5 +1,5 @@ /* { dg-do compile { target int128 } } */ -/* { dg-options "-O3 -march=skylake" } */ +/* { dg-options "-O3 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */ extern __int128 array[16]; diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4a.c b/gcc/testsuite/gcc.target/i386/pr100865-4a.c index 365487337aee..8609d1128b8d 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-4a.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-4a.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=skylake" } */ +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */ extern char array[64]; diff --git a/gcc/testsuite/gcc.target/i386/pr90773-20.c b/gcc/testsuite/gcc.target/i386/pr90773-20.c index e61e405f2b6b..884a5502b592 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-20.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-20.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=skylake" } */ +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c index 16ad17f3cbbd..5bbb387a3ea6 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=skylake" } */ +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pr90773-22.c b/gcc/testsuite/gcc.target/i386/pr90773-22.c index 45a8ff65a849..245a436b7eb1 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-22.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-22.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=skylake" } */ +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c index 9256ce10ff0f..ca4a86f30b85 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=skylake" } */ +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pr90773-26.c b/gcc/testsuite/gcc.target/i386/pr90773-26.c index b2513c3a9c8d..76fb79f2e203 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-26.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-26.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=skylake" } */ +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_move_by_pieces" } */ struct S {