For more details of GDS (Gather Data Sampling), refer to
https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/gather-data-sampling.html
After microcode update, there's performance regression. To avoid that,
the patch disables gather generation in autovectorization but uses
gather scalar emulation instead.
gcc/ChangeLog:
* config/i386/i386-options.c (m_GDS): New macro.
* config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Don't enable
for m_GDS.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx2-gather-2.c: Adjust options to keep
gather vectorization.
* gcc.target/i386/avx2-gather-6.c: Ditto.
* gcc.target/i386/avx512f-pr88464-1.c: Ditto.
* gcc.target/i386/avx512f-pr88464-5.c: Ditto.
* gcc.target/i386/avx512vl-pr88464-1.c: Ditto.
* gcc.target/i386/avx512vl-pr88464-11.c: Ditto.
* gcc.target/i386/avx512vl-pr88464-3.c: Ditto.
* gcc.target/i386/avx512vl-pr88464-9.c: Ditto.
(cherry picked from commit
3064d1f5c48cb6ce1b4133570dd08ecca8abb52d)
#define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS)
#define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT)
#define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
+/* Gather Data Sampling / CVE-2022-40982 / INTEL-SA-00828.
+ Software mitigation. */
+#define m_GDS (m_SKYLAKE | m_SKYLAKE_AVX512 | m_CANNONLAKE \
+ | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
+ | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE)
#define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
#define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
/* X86_TUNE_USE_GATHER: Use gather instructions. */
DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
- ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC))
+ ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC | m_GDS))
/* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
smaller FMA chain. */
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details -mtune=skylake" } */
+/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake -mtune=haswell" } */
#include "avx2-gather-1.c"
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=skylake" } */
+/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=haswell" } */
#include "avx2-gather-5.c"
/* PR tree-optimization/88464 */
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* PR tree-optimization/88464 */
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* PR tree-optimization/88464 */
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* PR tree-optimization/88464 */
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* PR tree-optimization/88464 */
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* PR tree-optimization/88464 */
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */