From: liuhongt Date: Tue, 22 Aug 2023 10:18:31 +0000 (+0800) Subject: Fix target_clone ("arch=graniterapids-d") and target_clone ("arch=arrowlake-s") X-Git-Tag: basepoints/gcc-15~6720 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=afe15e9742d9fefb3f4a9b1662cb3f977e3645fd;p=thirdparty%2Fgcc.git Fix target_clone ("arch=graniterapids-d") and target_clone ("arch=arrowlake-s") Both "graniterapid-d" and "graniterapids" are attached with PROCESSOR_GRANITERAPID in processor_alias_table but mapped to different __cpu_subtype in get_intel_cpu. And get_builtin_code_for_version will try to match the first PROCESSOR_GRANITERAPIDS in processor_alias_table which maps to "granitepraids" here. 861 else if (new_target->arch_specified && new_target->arch > 0) 1862 for (i = 0; i < pta_size; i++) 1863 if (processor_alias_table[i].processor == new_target->arch) 1864 { 1865 const pta *arch_info = &processor_alias_table[i]; 1866 switch (arch_info->priority) 1867 { 1868 default: 1869 arg_str = arch_info->name; This mismatch makes dispatch_function_versions check the preidcate of__builtin_cpu_is ("graniterapids") for "graniterapids-d" and causes the issue. The patch explicitly adds PROCESSOR_ARROWLAKE_S and PROCESSOR_GRANITERAPIDS_D to make a distinction. For "alderlake","raptorlake", "meteorlake" they share same isa, cost, tuning, and mapped to the same __cpu_type/__cpu_subtype in get_intel_cpu, so no need to add PROCESSOR_RAPTORLAKE and others. gcc/ChangeLog: * common/config/i386/i386-common.cc (processor_names): Add new member graniterapids-s and arrowlake-s. * config/i386/i386-options.cc (processor_alias_table): Update table with PROCESSOR_ARROWLAKE_S and PROCESSOR_GRANITERAPIDS_D. (m_GRANITERAPID_D): New macro. (m_ARROWLAKE_S): Ditto. (m_CORE_AVX512): Add m_GRANITERAPIDS_D. (processor_cost_table): Add icelake_cost for PROCESSOR_GRANITERAPIDS_D and alderlake_cost for PROCESSOR_ARROWLAKE_S. * config/i386/x86-tune.def: Hanlde m_ARROWLAKE_S same as m_ARROWLAKE. * config/i386/i386.h (enum processor_type): Add new member PROCESSOR_GRANITERAPIDS_D and PROCESSOR_ARROWLAKE_S. * config/i386/i386-c.cc (ix86_target_macros_internal): Handle PROCESSOR_GRANITERAPIDS_D and PROCESSOR_ARROWLAKE_S --- diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index 12a01704a73d..1e11163004b7 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -2155,7 +2155,9 @@ const char *const processor_names[] = "alderlake", "rocketlake", "graniterapids", + "graniterapids-d", "arrowlake", + "arrowlake-s", "intel", "lujiazui", "geode", @@ -2279,13 +2281,14 @@ const pta processor_alias_table[] = M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F}, - {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D, - M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F}, + {"graniterapids-d", PROCESSOR_GRANITERAPIDS_D, CPU_HASWELL, + PTA_GRANITERAPIDS_D, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), + P_PROC_AVX512F}, {"arrowlake", PROCESSOR_ARROWLAKE, CPU_HASWELL, PTA_ARROWLAKE, M_CPU_SUBTYPE (INTEL_COREI7_ARROWLAKE), P_PROC_AVX2}, - {"arrowlake-s", PROCESSOR_ARROWLAKE, CPU_HASWELL, PTA_ARROWLAKE_S, + {"arrowlake-s", PROCESSOR_ARROWLAKE_S, CPU_HASWELL, PTA_ARROWLAKE_S, M_CPU_SUBTYPE (INTEL_COREI7_ARROWLAKE_S), P_PROC_AVX2}, - {"lunarlake", PROCESSOR_ARROWLAKE, CPU_HASWELL, PTA_ARROWLAKE_S, + {"lunarlake", PROCESSOR_ARROWLAKE_S, CPU_HASWELL, PTA_ARROWLAKE_S, M_CPU_SUBTYPE (INTEL_COREI7_ARROWLAKE_S), P_PROC_AVX2}, {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index caef5531593d..0e11709ebc52 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -258,6 +258,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__graniterapids"); def_or_undef (parse_in, "__graniterapids__"); break; + case PROCESSOR_GRANITERAPIDS_D: + def_or_undef (parse_in, "__graniterapids_d"); + def_or_undef (parse_in, "__graniterapids_d__"); + break; case PROCESSOR_ALDERLAKE: def_or_undef (parse_in, "__alderlake"); def_or_undef (parse_in, "__alderlake__"); @@ -270,6 +274,11 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__arrowlake"); def_or_undef (parse_in, "__arrowlake__"); break; + case PROCESSOR_ARROWLAKE_S: + def_or_undef (parse_in, "__arrowlake_s"); + def_or_undef (parse_in, "__arrowlake_s__"); + break; + /* use PROCESSOR_max to not set/unset the arch macro. */ case PROCESSOR_max: break; @@ -451,9 +460,15 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_GRANITERAPIDS: def_or_undef (parse_in, "__tune_graniterapids__"); break; + case PROCESSOR_GRANITERAPIDS_D: + def_or_undef (parse_in, "__tune_graniterapids_d__"); + break; case PROCESSOR_ARROWLAKE: def_or_undef (parse_in, "__tune_arrowlake__"); break; + case PROCESSOR_ARROWLAKE_S: + def_or_undef (parse_in, "__tune_arrowlake_s__"); + break; case PROCESSOR_INTEL: case PROCESSOR_GENERIC: break; diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index f48112d4aa6e..9af4f9101430 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -127,10 +127,11 @@ along with GCC; see the file COPYING3. If not see #define m_ALDERLAKE (HOST_WIDE_INT_1U<> (W-1) ^ x) - @@ -385,7 +386,7 @@ DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE | m_LUJIAZUI | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT - | m_ALDERLAKE | m_ARROWLAKE | m_CORE_ATOM + | m_ALDERLAKE | m_ARROWLAKE | m_ARROWLAKE_S | m_CORE_ATOM | m_GENERIC)) /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ @@ -396,7 +397,7 @@ DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_LUJIAZUI | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ARROWLAKE - | m_CORE_ATOM | m_GENERIC) + | m_ARROWLAKE_S | m_CORE_ATOM | m_GENERIC) /*****************************************************************************/ /* SSE instruction selection tuning */ @@ -412,7 +413,7 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill", DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE - | m_ARROWLAKE | m_CORE_ATOM | m_AMDFAM10 | m_BDVER + | m_ARROWLAKE | m_ARROWLAKE_S | m_CORE_ATOM | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_LUJIAZUI | m_GENERIC) /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores @@ -420,7 +421,7 @@ DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE - | m_ARROWLAKE | m_CORE_ATOM | m_BDVER | m_ZNVER + | m_ARROWLAKE | m_ARROWLAKE_S| m_CORE_ATOM | m_BDVER | m_ZNVER | m_LUJIAZUI | m_GENERIC) /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single @@ -431,13 +432,13 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */ DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", m_AMD_MULTIPLE | m_LUJIAZUI | m_CORE_ALL | m_TREMONT | m_ALDERLAKE - | m_ARROWLAKE | m_CORE_ATOM | m_GENERIC) + | m_ARROWLAKE | m_ARROWLAKE_S | m_CORE_ATOM | m_GENERIC) /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to xorps/xorpd and other variants. */ DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER - | m_LUJIAZUI | m_TREMONT | m_ALDERLAKE | m_ARROWLAKE + | m_LUJIAZUI | m_TREMONT | m_ALDERLAKE | m_ARROWLAKE | m_ARROWLAKE_S | m_CORE_ATOM | m_GENERIC) /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer @@ -485,13 +486,13 @@ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", /* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE - | m_ARROWLAKE | m_CORE_ATOM | m_INTEL) + | m_ARROWLAKE | m_ARROWLAKE_S | m_CORE_ATOM | m_INTEL) /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2 elements. */ DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE - | m_ARROWLAKE | m_CORE_ATOM | m_GENERIC | m_GDS)) + | m_ARROWLAKE | m_ARROWLAKE_S | m_CORE_ATOM | m_GENERIC | m_GDS)) /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2 elements. */ @@ -502,7 +503,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts", elements. */ DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE - | m_ARROWLAKE | m_CORE_ATOM | m_GENERIC | m_GDS)) + | m_ARROWLAKE | m_ARROWLAKE_S | m_CORE_ATOM | m_GENERIC | m_GDS)) /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4 elements. */ @@ -513,7 +514,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts", elements. */ DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts", ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_ARROWLAKE - | m_CORE_ATOM | m_GENERIC | m_GDS)) + | m_ARROWLAKE_S | m_CORE_ATOM | m_GENERIC | m_GDS)) /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more elements. */ @@ -527,7 +528,7 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or smaller FMA chain. */ DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 - | m_ALDERLAKE | m_ARROWLAKE | m_SAPPHIRERAPIDS + | m_ALDERLAKE | m_ARROWLAKE | m_ARROWLAKE_S | m_SAPPHIRERAPIDS | m_CORE_ATOM) /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or @@ -572,13 +573,13 @@ DEF_TUNE (X86_TUNE_AVX512_SPLIT_REGS, "avx512_split_regs", m_ZNVER4) /* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces", - m_ALDERLAKE | m_ARROWLAKE | m_CORE_AVX2 | m_ZNVER1 + m_ALDERLAKE | m_ARROWLAKE | m_ARROWLAKE_S | m_CORE_AVX2 | m_ZNVER1 | m_ZNVER2 | m_ZNVER3) /* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", - m_ALDERLAKE | m_ARROWLAKE | m_CORE_AVX2 | m_ZNVER1 + m_ALDERLAKE | m_ARROWLAKE | m_ARROWLAKE_S | m_CORE_AVX2 | m_ZNVER1 | m_ZNVER2 | m_ZNVER3) /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit