From: H.J. Lu Date: Wed, 15 Sep 2021 06:18:21 +0000 (+0800) Subject: x86: Add TARGET_SSE_PARTIAL_REG_[FP_]CONVERTS_DEPENDENCY X-Git-Tag: basepoints/gcc-13~4698 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=48b3caffcacc99adf72ba1be189a7d9ebc4190be;p=thirdparty%2Fgcc.git x86: Add TARGET_SSE_PARTIAL_REG_[FP_]CONVERTS_DEPENDENCY 1. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY in SSE FP to FP splitters. 2. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY in SSE INT to FP splitters. 3. Also check TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY and TARGET_SSE_PARTIAL_REG_DEPENDENCY when handling avx_partial_xmm_update attribute. Don't convert AVX partial XMM register update if there is no partial SSE register dependency for SSE conversion. gcc/ * config/i386/i386-features.c (remove_partial_avx_dependency): Also check TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY and and TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY before generating vxorps. * config/i386/i386.h (TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): New. (TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise. * config/i386/i386.md (SSE FP to FP splitters): Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY. (SSE INT to FP splitter): Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY. * config/i386/x86-tune.def (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): New. (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise. gcc/testsuite/ * gcc.target/i386/avx-covert-1.c: New file. * gcc.target/i386/avx-fp-covert-1.c: Likewise. * gcc.target/i386/avx-int-covert-1.c: Likewise. * gcc.target/i386/sse-covert-1.c: Likewise. * gcc.target/i386/sse-fp-covert-1.c: Likewise. * gcc.target/i386/sse-int-covert-1.c: Likewise. --- diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index a65f60122a5f..a525a83afd3a 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -2222,12 +2222,14 @@ remove_partial_avx_dependency (void) { case E_SFmode: case E_DFmode: - if (TARGET_USE_VECTOR_FP_CONVERTS) + if (TARGET_USE_VECTOR_FP_CONVERTS + || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY) continue; break; case E_SImode: case E_DImode: - if (TARGET_USE_VECTOR_CONVERTS) + if (TARGET_USE_VECTOR_CONVERTS + || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY) continue; break; default: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e76bb55c080d..ec60b89753e6 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -334,6 +334,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY] #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \ ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY] +#define TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY \ + ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY] +#define TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY \ + ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY] #define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \ ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL] #define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 13f6f57cdcce..c82a9dc1f670 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4535,7 +4535,8 @@ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] "!TARGET_AVX - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY + && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) @@ -4708,7 +4709,8 @@ (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] "!TARGET_AVX - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY + && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) @@ -5243,7 +5245,8 @@ [(set (match_operand:MODEF 0 "sse_reg_operand") (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] "!TARGET_AVX - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY + && epilogue_completed && optimize_function_for_speed_p (cfun) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 088edb6c4ca7..58e8ead56b4b 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -64,6 +64,21 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC) +/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids + partial write to the destination in scalar SSE conversion from FP + to FP. */ +DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY, + "sse_partial_reg_fp_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 + | m_BDVER | m_ZNVER | m_GENERIC) + +/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial + write to the destination in scalar SSE conversion from integer to FP. */ +DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, + "sse_partial_reg_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 + | m_BDVER | m_ZNVER | m_GENERIC) + /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies are resolved on SSE register parts instead of whole registers, so we may maintain just lower part of scalar values in proper format leaving the diff --git a/gcc/testsuite/gcc.target/i386/avx-covert-1.c b/gcc/testsuite/gcc.target/i386/avx-covert-1.c new file mode 100644 index 000000000000..b6c794ecbb8d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-covert-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency,^sse_partial_reg_converts_dependency" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtss2sd" } } */ +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vcvtps2pd" } } */ +/* { dg-final { scan-assembler-not "vcvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "vxorps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c b/gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c new file mode 100644 index 000000000000..c40c48b1b2d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency" } */ + +extern float f; +extern double d; + +void +foo (void) +{ + d = f; +} + +/* { dg-final { scan-assembler "vcvtss2sd" } } */ +/* { dg-final { scan-assembler-not "vcvtps2pd" } } */ +/* { dg-final { scan-assembler-not "vxorps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-int-covert-1.c b/gcc/testsuite/gcc.target/i386/avx-int-covert-1.c new file mode 100644 index 000000000000..01bb64e66ccb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-int-covert-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=^sse_partial_reg_converts_dependency" } */ + +extern float f; +extern int i; + +void +foo (void) +{ + f = i; +} + +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vxorps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-covert-1.c new file mode 100644 index 000000000000..c30af6945058 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-covert-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency,^sse_partial_reg_converts_dependency" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "cvtss2sd" } } */ +/* { dg-final { scan-assembler "cvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "cvtps2pd" } } */ +/* { dg-final { scan-assembler-not "cvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "pxor" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c new file mode 100644 index 000000000000..b6567e60e3e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency" } */ + +extern float f; +extern double d; + +void +foo (void) +{ + d = f; +} + +/* { dg-final { scan-assembler "cvtss2sd" } } */ +/* { dg-final { scan-assembler-not "cvtps2pd" } } */ +/* { dg-final { scan-assembler-not "pxor" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c new file mode 100644 index 000000000000..107f7241def0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_converts_dependency" } */ + +extern float f; +extern int i; + +void +foo (void) +{ + f = i; +} + +/* { dg-final { scan-assembler "cvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "pxor" } } */