From: Sebastian Peryt Date: Mon, 4 Dec 2017 11:40:44 +0000 (+0100) Subject: Fix PR82941 and PR82942 by adding proper vzeroupper generation on SKX. X-Git-Tag: releases/gcc-6.5.0~652 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a530ac2e871f357cb02ca7cdfa83a9d3e854abb0;p=thirdparty%2Fgcc.git Fix PR82941 and PR82942 by adding proper vzeroupper generation on SKX. Add X86_TUNE_EMIT_VZEROUPPER to indicate if vzeroupper instruction should be inserted before a transfer of control flow out of the function. It is turned on by default unless we are tuning for KNL. Users can always use -mzeroupper or -mno-zeroupper to override X86_TUNE_EMIT_VZEROUPPER. 2017-12-04 Sebastian Peryt H.J. Lu gcc/ Bakcported from trunk PR target/82941 PR target/82942 PR target/82990 * config/i386/i386.c (pass_insert_vzeroupper): Remove TARGET_AVX512F check from gate condition. (ix86_check_avx256_register): Changed to ... (ix86_check_avx_upper_register): ... this. Add extra check for VALID_AVX512F_REG_OR_XI_MODE. (ix86_avx_u128_mode_needed): Changed ix86_check_avx256_register to ix86_check_avx_upper_register. (ix86_check_avx256_stores): Changed to ... (ix86_check_avx_upper_stores): ... this. Changed ix86_check_avx256_register to ix86_check_avx_upper_register. (ix86_avx_u128_mode_after): Changed avx_reg256_found to avx_upper_reg_found. Changed ix86_check_avx256_stores to ix86_check_avx_upper_stores. (ix86_avx_u128_mode_entry): Changed ix86_check_avx256_register to ix86_check_avx_upper_register. (ix86_avx_u128_mode_exit): Ditto. (ix86_option_override_internal): Set MASK_VZEROUPPER if neither -mzeroupper nor -mno-zeroupper is used and TARGET_EMIT_VZEROUPPER is set. * config/i386/i386.h: (host_detect_local_cpu): New define. (TARGET_EMIT_VZEROUPPER): New. * config/i386/x86-tune.def: Add X86_TUNE_EMIT_VZEROUPPER. gcc/testsuite/ Backported from trunk PR target/82941 PR target/82942 PR target/82990 * gcc.target/i386/pr82941-1.c: New test. * gcc.target/i386/pr82941-2.c: Likewise. * gcc.target/i386/pr82942-1.c: Likewise. * gcc.target/i386/pr82942-2.c: Likewise. * gcc.target/i386/pr82990-1.c: Likewise. * gcc.target/i386/pr82990-2.c: Likewise. * gcc.target/i386/pr82990-3.c: Likewise. * gcc.target/i386/pr82990-4.c: Likewise. * gcc.target/i386/pr82990-5.c: Likewise. * gcc.target/i386/pr82990-6.c: Likewise. * gcc.target/i386/pr82990-7.c: Likewise. Co-Authored-By: H.J. Lu From-SVN: r255379 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a5987949442e..e586870d1281 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,33 @@ +2017-12-04 Sebastian Peryt + H.J. Lu + + Bakcported from trunk + PR target/82941 + PR target/82942 + PR target/82990 + * config/i386/i386.c (pass_insert_vzeroupper): Remove + TARGET_AVX512F check from gate condition. + (ix86_check_avx256_register): Changed to ... + (ix86_check_avx_upper_register): ... this. Add extra check for + VALID_AVX512F_REG_OR_XI_MODE. + (ix86_avx_u128_mode_needed): Changed + ix86_check_avx256_register to ix86_check_avx_upper_register. + (ix86_check_avx256_stores): Changed to ... + (ix86_check_avx_upper_stores): ... this. Changed + ix86_check_avx256_register to ix86_check_avx_upper_register. + (ix86_avx_u128_mode_after): Changed + avx_reg256_found to avx_upper_reg_found. Changed + ix86_check_avx256_stores to ix86_check_avx_upper_stores. + (ix86_avx_u128_mode_entry): Changed + ix86_check_avx256_register to ix86_check_avx_upper_register. + (ix86_avx_u128_mode_exit): Ditto. + (ix86_option_override_internal): Set MASK_VZEROUPPER if + neither -mzeroupper nor -mno-zeroupper is used and + TARGET_EMIT_VZEROUPPER is set. + * config/i386/i386.h: (host_detect_local_cpu): New define. + (TARGET_EMIT_VZEROUPPER): New. + * config/i386/x86-tune.def: Add X86_TUNE_EMIT_VZEROUPPER + 2017-12-01 Segher Boessenkool Backport from mainline diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7449f39729cd..8b5faac51296 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2675,7 +2675,7 @@ rest_of_handle_insert_vzeroupper (void) int i; /* vzeroupper instructions are inserted immediately after reload to - account for possible spills from 256bit registers. The pass + account for possible spills from 256bit or 512bit registers. The pass reuses mode switching infrastructure by re-running mode insertion pass, so disable entities that have already been processed. */ for (i = 0; i < MAX_386_ENTITIES; i++) @@ -3650,7 +3650,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return TARGET_AVX && !TARGET_AVX512F + return TARGET_AVX && TARGET_VZEROUPPER && flag_expensive_optimizations && !optimize_size; } @@ -5470,7 +5470,8 @@ ix86_option_override_internal (bool main_args_p, #endif } - if (!(opts_set->x_target_flags & MASK_VZEROUPPER)) + if (!(opts_set->x_target_flags & MASK_VZEROUPPER) + && TARGET_EMIT_VZEROUPPER) opts->x_target_flags |= MASK_VZEROUPPER; if (!(opts_set->x_target_flags & MASK_STV)) opts->x_target_flags |= MASK_STV; @@ -18025,16 +18026,17 @@ output_387_binary_op (rtx insn, rtx *operands) return buf; } -/* Check if a 256bit AVX register is referenced inside of EXP. */ +/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ static bool -ix86_check_avx256_register (const_rtx exp) +ix86_check_avx_upper_register (const_rtx exp) { if (SUBREG_P (exp)) exp = SUBREG_REG (exp); return (REG_P (exp) - && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))); + && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)) + || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp)))); } /* Return needed mode for entity in optimize_mode_switching pass. */ @@ -18047,7 +18049,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) rtx link; /* Needed mode is set to AVX_U128_CLEAN if there are - no 256bit modes used in function arguments. */ + no 256bit or 512bit modes used in function arguments. */ for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) @@ -18056,7 +18058,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) { rtx arg = XEXP (XEXP (link, 0), 0); - if (ix86_check_avx256_register (arg)) + if (ix86_check_avx_upper_register (arg)) return AVX_U128_DIRTY; } } @@ -18064,13 +18066,13 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) return AVX_U128_CLEAN; } - /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware - changes state only when a 256bit register is written to, but we need - to prevent the compiler from moving optimal insertion point above - eventual read from 256bit register. */ + /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. + Hardware changes state only when a 256bit register is written to, + but we need to prevent the compiler from moving optimal insertion + point above eventual read from 256bit or 512 bit register. */ subrtx_iterator::array_type array; FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) - if (ix86_check_avx256_register (*iter)) + if (ix86_check_avx_upper_register (*iter)) return AVX_U128_DIRTY; return AVX_U128_ANY; @@ -18150,12 +18152,12 @@ ix86_mode_needed (int entity, rtx_insn *insn) return 0; } -/* Check if a 256bit AVX register is referenced in stores. */ +/* Check if a 256bit or 512bit AVX register is referenced in stores. */ static void -ix86_check_avx256_stores (rtx dest, const_rtx, void *data) +ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) { - if (ix86_check_avx256_register (dest)) + if (ix86_check_avx_upper_register (dest)) { bool *used = (bool *) data; *used = true; @@ -18174,18 +18176,18 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) return AVX_U128_CLEAN; /* We know that state is clean after CALL insn if there are no - 256bit registers used in the function return register. */ + 256bit or 512bit registers used in the function return register. */ if (CALL_P (insn)) { - bool avx_reg256_found = false; - note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found); + bool avx_upper_reg_found = false; + note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found); - return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; + return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; } /* Otherwise, return current mode. Remember that if insn - references AVX 256bit registers, the mode was already changed - to DIRTY from MODE_NEEDED. */ + references AVX 256bit or 512bit registers, the mode was already + changed to DIRTY from MODE_NEEDED. */ return mode; } @@ -18214,13 +18216,13 @@ ix86_avx_u128_mode_entry (void) tree arg; /* Entry mode is set to AVX_U128_DIRTY if there are - 256bit modes used in function arguments. */ + 256bit or 512bit modes used in function arguments. */ for (arg = DECL_ARGUMENTS (current_function_decl); arg; arg = TREE_CHAIN (arg)) { rtx incoming = DECL_INCOMING_RTL (arg); - if (incoming && ix86_check_avx256_register (incoming)) + if (incoming && ix86_check_avx_upper_register (incoming)) return AVX_U128_DIRTY; } @@ -18252,9 +18254,9 @@ ix86_avx_u128_mode_exit (void) { rtx reg = crtl->return_rtx; - /* Exit mode is set to AVX_U128_DIRTY if there are - 256bit modes used in the function return register. */ - if (reg && ix86_check_avx256_register (reg)) + /* Exit mode is set to AVX_U128_DIRTY if there are 256bit + or 512 bit modes used in the function return register. */ + if (reg && ix86_check_avx_upper_register (reg)) return AVX_U128_DIRTY; return AVX_U128_CLEAN; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 365ec4376edb..8113f83c7fd2 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -501,6 +501,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] #define TARGET_ONE_IF_CONV_INSN \ ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN] +#define TARGET_EMIT_VZEROUPPER \ + ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { @@ -1124,6 +1126,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \ || (MODE) == V4TImode) +#define VALID_AVX512F_REG_OR_XI_MODE(MODE) \ + (VALID_AVX512F_REG_MODE (MODE) || (MODE) == XImode) + #define VALID_AVX512VL_128_REG_MODE(MODE) \ ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \ || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode) diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 31a87b913b26..9d00abbcac3f 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -555,3 +555,7 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4) if-converted to one. */ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC) + +/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion + before a transfer of control flow out of the function. */ +DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f9398699636e..6a1b459f2b5a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,22 @@ +2017-12-04 Sebastian Peryt + H.J. Lu + + Backported from trunk + PR target/82941 + PR target/82942 + PR target/82990 + * gcc.target/i386/pr82941-1.c: New test. + * gcc.target/i386/pr82941-2.c: Likewise. + * gcc.target/i386/pr82942-1.c: Likewise. + * gcc.target/i386/pr82942-2.c: Likewise. + * gcc.target/i386/pr82990-1.c: Likewise. + * gcc.target/i386/pr82990-2.c: Likewise. + * gcc.target/i386/pr82990-3.c: Likewise. + * gcc.target/i386/pr82990-4.c: Likewise. + * gcc.target/i386/pr82990-5.c: Likewise. + * gcc.target/i386/pr82990-6.c: Likewise. + * gcc.target/i386/pr82990-7.c: Likewise. + 2017-11-21 Rainer Orth Backport from mainline diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c b/gcc/testsuite/gcc.target/i386/pr82941-1.c new file mode 100644 index 000000000000..d7e530d51161 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +#include + +extern __m512d y, z; + +void +pr82941 () +{ + z = y; +} + +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82941-2.c b/gcc/testsuite/gcc.target/i386/pr82941-2.c new file mode 100644 index 000000000000..db2f8589ab69 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82941-2.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=knl" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c new file mode 100644 index 000000000000..9cdf81a9d603 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512er -O2" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82942-2.c b/gcc/testsuite/gcc.target/i386/pr82942-2.c new file mode 100644 index 000000000000..ddb4e689659b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82942-2.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mavx512er -mtune=knl -O2" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c new file mode 100644 index 000000000000..ff1d6d40eb26 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=knl -mvzeroupper" } */ + +#include + +extern __m512d y, z; + +void +pr82941 () +{ + z = y; +} + +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-2.c b/gcc/testsuite/gcc.target/i386/pr82990-2.c new file mode 100644 index 000000000000..0d3cb2333ddf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-2.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-vzeroupper" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c new file mode 100644 index 000000000000..201fa98d8d41 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mavx512er -mvzeroupper -O2" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-4.c b/gcc/testsuite/gcc.target/i386/pr82990-4.c new file mode 100644 index 000000000000..09f161c7291d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-4.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512er -mno-vzeroupper -O2" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c new file mode 100644 index 000000000000..9932bdc53756 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mtune=generic" } */ + +#include + +extern __m512d y, z; + +void +pr82941 () +{ + z = y; +} + +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-6.c b/gcc/testsuite/gcc.target/i386/pr82990-6.c new file mode 100644 index 000000000000..063a61c111d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-6.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -mtune=knl" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82990-7.c b/gcc/testsuite/gcc.target/i386/pr82990-7.c new file mode 100644 index 000000000000..dedde8b854b9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82990-7.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -mtune=generic -mtune-ctrl=^emit_vzeroupper" } */ + +#include "pr82941-1.c" + +/* { dg-final { scan-assembler-not "vzeroupper" } } */