x86: Update -mtune=intel for Diamond Rapids/Clearwater Forest

author H.J. Lu <hjl.tools@gmail.com>

Tue, 24 Jun 2025 23:40:31 +0000 (07:40 +0800)

committer H.J. Lu <hjl.tools@gmail.com>

Wed, 25 Jun 2025 05:21:21 +0000 (13:21 +0800)
author H.J. Lu <hjl.tools@gmail.com>
Tue, 24 Jun 2025 23:40:31 +0000 (07:40 +0800)
committer H.J. Lu <hjl.tools@gmail.com>
Wed, 25 Jun 2025 05:21:21 +0000 (13:21 +0800)
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc

index 64908ce740a98da193335a67a44019be8bedff75..dfcd4e9a7276918493167bf9ad5c3523ef3c99e5 100644 (file)
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -2310,7 +2310,7 @@ const pta processor_alias_table[] =
      M_CPU_TYPE (INTEL_GRANDRIDGE), P_PROC_AVX2},
    {"clearwaterforest", PROCESSOR_CLEARWATERFOREST, CPU_HASWELL,
      PTA_CLEARWATERFOREST, M_CPU_TYPE (INTEL_CLEARWATERFOREST), P_PROC_AVX2},
-  {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM,
+  {"intel", PROCESSOR_INTEL, CPU_HASWELL, PTA_HASWELL,
      M_VENDOR (VENDOR_INTEL), P_NONE},
    {"geode", PROCESSOR_GEODE, CPU_GEODE,
      PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE, 0, P_NONE},
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc

index d1e321ad74b1e197ee0fc5f0cd99ec5d7ff00b7c..27feeddaf8120931f46ffa4fe07e443def24be6c 100644 (file)
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -797,7 +797,7 @@ static const struct processor_costs *processor_cost_table[] =
    &alderlake_cost,     /* PROCESSOR_ARROWLAKE_S.       */
    &alderlake_cost,     /* PROCESSOR_PANTHERLAKE.       */
    &icelake_cost,       /* PROCESSOR_DIAMONDRAPIDS.     */
-  &intel_cost,         /* PROCESSOR_INTEL.             */
+  &alderlake_cost,     /* PROCESSOR_INTEL.             */
    &lujiazui_cost,      /* PROCESSOR_LUJIAZUI.          */
    &yongfeng_cost,      /* PROCESSOR_YONGFENG.          */
    &shijidadao_cost,    /* PROCESSOR_SHIJIDADAO.        */
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h

index a5b99d1f9629b21ae4cd7729e1c2320de96bf1a5..c8603b982af48f0181a35d13203acbc2c9c34af6 100644 (file)
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -3568,127 +3568,6 @@ struct processor_costs tremont_cost = {
    COSTS_N_INSNS (2),                   /* Branch mispredict scale.  */
  };
  
-static stringop_algs intel_memcpy[2] = {
-  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static stringop_algs intel_memset[2] = {
-  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static const
-struct processor_costs intel_cost = {
-  {
-  /* Start of register allocator costs.  integer->integer move cost is 2. */
-  6,                                /* cost for loading QImode using movzbl */
-  {4, 4, 4},                           /* cost of loading integer registers
-                                          in QImode, HImode and SImode.
-                                          Relative to reg-reg move (2).  */
-  {6, 6, 6},                           /* cost of storing integer registers */
-  2,                                   /* cost of reg,reg fld/fst */
-  {6, 6, 8},                           /* cost of loading fp registers
-                                          in SFmode, DFmode and XFmode */
-  {6, 6, 10},                          /* cost of storing fp registers
-                                          in SFmode, DFmode and XFmode */
-  2,                                   /* cost of moving MMX register */
-  {6, 6},                              /* cost of loading MMX registers
-                                          in SImode and DImode */
-  {6, 6},                              /* cost of storing MMX registers
-                                          in SImode and DImode */
-  2, 2, 2,                             /* cost of moving XMM,YMM,ZMM register */
-  {6, 6, 6, 6, 6},                     /* cost of loading SSE registers
-                                          in 32,64,128,256 and 512-bit */
-  {6, 6, 6, 6, 6},                     /* cost of storing SSE registers
-                                          in 32,64,128,256 and 512-bit */
-  4, 4,                                /* SSE->integer and integer->SSE moves */
-  4, 4,                                /* mask->integer and integer->mask moves */
-  {4, 4, 4},                           /* cost of loading mask register
-                                          in QImode, HImode, SImode.  */
-  {6, 6, 6},                           /* cost if storing mask register
-                                          in QImode, HImode, SImode.  */
-  2,                                   /* cost of moving mask register.  */
-  /* End of register allocator costs.  */
-  },
-
-  COSTS_N_INSNS (1),                   /* cost of an add instruction */
-  COSTS_N_INSNS (1) + 1,               /* cost of a lea instruction */
-  COSTS_N_INSNS (1),                   /* variable shift costs */
-  COSTS_N_INSNS (1),                   /* constant shift costs */
-  {COSTS_N_INSNS (3),                  /* cost of starting multiply for QI */
-   COSTS_N_INSNS (3),                  /*                               HI */
-   COSTS_N_INSNS (3),                  /*                               SI */
-   COSTS_N_INSNS (4),                  /*                               DI */
-   COSTS_N_INSNS (2)},                 /*                            other */
-  0,                                   /* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),                 /* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),                 /*                          HI */
-   COSTS_N_INSNS (42),                 /*                          SI */
-   COSTS_N_INSNS (74),                 /*                          DI */
-   COSTS_N_INSNS (74)},                        /*                          other */
-  COSTS_N_INSNS (1),                   /* cost of movsx */
-  COSTS_N_INSNS (1),                   /* cost of movzx */
-  8,                                   /* "large" insn */
-  17,                                  /* MOVE_RATIO */
-  6,                                   /* CLEAR_RATIO */
-  {4, 4, 4},                           /* cost of loading integer registers
-                                          in QImode, HImode and SImode.
-                                          Relative to reg-reg move (2).  */
-  {6, 6, 6},                           /* cost of storing integer registers */
-  {6, 6, 6, 6, 6},                     /* cost of loading SSE register
-                                          in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {6, 6, 6, 6, 6},                     /* cost of storing SSE register
-                                          in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {10, 10, 10, 10, 10},                        /* cost of unaligned loads.  */
-  {10, 10, 10, 10, 10},                        /* cost of unaligned loads.  */
-  2, 2, 2,                             /* cost of moving XMM,YMM,ZMM register */
-  4,                                   /* cost of moving SSE register to integer.  */
-  4,                                   /* cost of moving integer register to SSE.  */
-  6, 6,                                        /* Gather load static, per_elt.  */
-  6, 6,                                        /* Gather store static, per_elt.  */
-  32,                                  /* size of l1 cache.  */
-  256,                                 /* size of l2 cache.  */
-  64,                                  /* size of prefetch block */
-  6,                                   /* number of parallel prefetches */
-  3,                                   /* Branch cost */
-  COSTS_N_INSNS (8),                   /* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (8),                   /* cost of FMUL instruction.  */
-  COSTS_N_INSNS (20),                  /* cost of FDIV instruction.  */
-  COSTS_N_INSNS (8),                   /* cost of FABS instruction.  */
-  COSTS_N_INSNS (8),                   /* cost of FCHS instruction.  */
-  COSTS_N_INSNS (40),                  /* cost of FSQRT instruction.  */
-
-  COSTS_N_INSNS (1),                   /* cost of cheap SSE instruction.  */
-  COSTS_N_INSNS (8),                   /* cost of ADDSS/SD SUBSS/SD insns.  */
-  COSTS_N_INSNS (8),                   /* cost of MULSS instruction.  */
-  COSTS_N_INSNS (8),                   /* cost of MULSD instruction.  */
-  COSTS_N_INSNS (6),                   /* cost of FMA SS instruction.  */
-  COSTS_N_INSNS (6),                   /* cost of FMA SD instruction.  */
-  COSTS_N_INSNS (20),                  /* cost of DIVSS instruction.  */
-  COSTS_N_INSNS (20),                  /* cost of DIVSD instruction.  */
-  COSTS_N_INSNS (40),                  /* cost of SQRTSS instruction.  */
-  COSTS_N_INSNS (40),                  /* cost of SQRTSD instruction.  */
-  COSTS_N_INSNS (8),                   /* cost of CVTSS2SD etc.  */
-  COSTS_N_INSNS (16),                  /* cost of 256bit VCVTPS2PD etc.  */
-  COSTS_N_INSNS (32),                  /* cost of 512bit VCVTPS2PD etc.  */
-  COSTS_N_INSNS (8),                   /* cost of CVTSI2SS instruction.  */
-  COSTS_N_INSNS (8),                   /* cost of CVT(T)SS2SI instruction.  */
-  COSTS_N_INSNS (8),                   /* cost of CVTPI2PS instruction.  */
-  COSTS_N_INSNS (8),                   /* cost of CVT(T)PS2PI instruction.  */
-  1, 4, 1, 1,                          /* reassoc int, fp, vec_int, vec_fp.  */
-  intel_memcpy,
-  intel_memset,
-  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
-  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
-  "16",                                        /* Loop alignment.  */
-  "16:8:8",                            /* Jump alignment.  */
-  "0:0:8",                             /* Label alignment.  */
-  "16",                                        /* Func alignment.  */
-  4,                                   /* Small unroll limit.  */
-  2,                                   /* Small unroll factor.  */
-  COSTS_N_INSNS (2),                   /* Branch mispredict scale.  */
-};
-
  /* lujiazui_cost should produce code tuned for ZHAOXIN lujiazui CPU.  */
  static stringop_algs lujiazui_memcpy[2] = {
    {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc

index 61b1a2686ad24e378bb4a25ffb0cfc6c9b333cc2..ff9c2683007f92f26d8bc6e4dd2804199c872a59 100644 (file)
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -45,7 +45,6 @@ ix86_issue_rate (void)
      case PROCESSOR_LAKEMONT:
      case PROCESSOR_BONNELL:
      case PROCESSOR_SILVERMONT:
-    case PROCESSOR_INTEL:
      case PROCESSOR_K6:
      case PROCESSOR_BTVER2:
      case PROCESSOR_PENTIUM4:
@@ -81,6 +80,7 @@ ix86_issue_rate (void)
      case PROCESSOR_YONGFENG:
      case PROCESSOR_SHIJIDADAO:
      case PROCESSOR_SIERRAFOREST:
+    case PROCESSOR_INTEL:
      case PROCESSOR_GENERIC:
      /* For znver5 decoder can handle 4 or 8 instructions per cycle,
         op cache 12 instruction/cycle, dispatch 8 instructions
@@ -497,6 +497,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
      case PROCESSOR_HASWELL:
      case PROCESSOR_TREMONT:
      case PROCESSOR_ALDERLAKE:
+    case PROCESSOR_INTEL:
      case PROCESSOR_GENERIC:
        /* Stack engine allows to execute push&pop instructions in parall.  */
        if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
@@ -519,7 +520,6 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
        break;
  
      case PROCESSOR_SILVERMONT:
-    case PROCESSOR_INTEL:
        if (!reload_completed)
         return cost;
  
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 100bdaf904517acef07c4cbbb176bc3fe04c31f4..b83818337c571ae22f2e5beb40921c05ffa55d7a 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -35227,8 +35227,8 @@ Produce code optimized for the most current Intel processors, which are
  Haswell and Silvermont for this version of GCC.  If you know the CPU
  on which your code will run, then you should use the corresponding
  @option{-mtune} or @option{-march} option instead of @option{-mtune=intel}.
-But, if you want your application performs better on both Haswell and
-Silvermont, then you should use this option.
+But, if you want your application performs better on both Diamond Rapids
+and Clearwater Forest, then you should use this option.
  
  As new Intel processors are deployed in the marketplace, the behavior of
  this option will change.  Therefore, if you upgrade to a newer version of
author	H.J. Lu <hjl.tools@gmail.com>
	Tue, 24 Jun 2025 23:40:31 +0000 (07:40 +0800)
committer	H.J. Lu <hjl.tools@gmail.com>
	Wed, 25 Jun 2025 05:21:21 +0000 (13:21 +0800)
gcc/common/config/i386/i386-common.cc		patch \| blob \| blame \| history
gcc/config/i386/i386-options.cc		patch \| blob \| blame \| history
gcc/config/i386/x86-tune-costs.h		patch \| blob \| blame \| history
gcc/config/i386/x86-tune-sched.cc		patch \| blob \| blame \| history
gcc/doc/invoke.texi		patch \| blob \| blame \| history