]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Enable AMD znver4 support and add instruction reservations
authorTejas Joshi <TejasSanjay.Joshi@amd.com>
Tue, 28 Jun 2022 11:03:53 +0000 (16:33 +0530)
committerVenkataramanan Kumar <Venkataramanan.Kumar@amd.com>
Fri, 21 Oct 2022 09:55:57 +0000 (15:25 +0530)
2022-09-28  Tejas Joshi <TejasSanjay.Joshi@amd.com>

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_amd_cpu): Recognize znver4.
* common/config/i386/i386-common.cc (processor_names): Add znver4.
(processor_alias_table): Add znver4 and modularize old znvers.
* common/config/i386/i386-cpuinfo.h (processor_subtypes):
AMDFAM19H_ZNVER4.
* config.gcc (x86_64-*-* |...): Likewise.
* config/i386/driver-i386.cc (host_detect_local_cpu): Let
-march=native recognize znver4 cpus.
* config/i386/i386-c.cc (ix86_target_macros_internal): Add znver4.
* config/i386/i386-options.cc (m_ZNVER4): New definition.
(m_ZNVER): Include m_ZNVER4.
(processor_cost_table): Add znver4.
* config/i386/i386.cc (ix86_reassociation_width): Likewise.
* config/i386/i386.h (processor_type): Add PROCESSOR_ZNVER4.
(PTA_ZNVER1): New definition.
(PTA_ZNVER2): Likewise.
(PTA_ZNVER3): Likewise.
(PTA_ZNVER4): Likewise.
* config/i386/i386.md (define_attr "cpu"): Add znver4 and rename
md file.
* config/i386/x86-tune-costs.h (znver4_cost): New definition.
* config/i386/x86-tune-sched.cc (ix86_issue_rate): Add znver4.
(ix86_adjust_cost): Likewise.
* config/i386/znver1.md: Rename to znver.md.
* config/i386/znver.md: Add new reservations for znver4.
* doc/extend.texi: Add details about znver4.
* doc/invoke.texi: Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/i386/funcspec-56.inc: Handle new march.
* g++.target/i386/mv29.C: Likewise.

17 files changed:
gcc/common/config/i386/cpuinfo.h
gcc/common/config/i386/i386-common.cc
gcc/common/config/i386/i386-cpuinfo.h
gcc/config.gcc
gcc/config/i386/driver-i386.cc
gcc/config/i386/i386-c.cc
gcc/config/i386/i386-options.cc
gcc/config/i386/i386.cc
gcc/config/i386/i386.h
gcc/config/i386/i386.md
gcc/config/i386/x86-tune-costs.h
gcc/config/i386/x86-tune-sched.cc
gcc/config/i386/znver.md [moved from gcc/config/i386/znver1.md with 62% similarity]
gcc/doc/extend.texi
gcc/doc/invoke.texi
gcc/testsuite/g++.target/i386/mv29.C
gcc/testsuite/gcc.target/i386/funcspec-56.inc

index bed88003f8ef2397be5ec37e348d1bcac475d61e..d45451c5704270f124466ef829e6f971e4374510 100644 (file)
@@ -253,13 +253,27 @@ get_amd_cpu (struct __processor_model *cpu_model,
       break;
     case 0x19:
       cpu_model->__cpu_type = AMDFAM19H;
-      /* AMD family 19h version 1.  */
+      /* AMD family 19h.  */
       if (model <= 0x0f)
        {
          cpu = "znver3";
          CHECK___builtin_cpu_is ("znver3");
          cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3;
        }
+      else if ((model >= 0x10 && model <= 0x1f)
+               || (model >= 0x60 && model <= 0xaf))
+       {
+         cpu = "znver4";
+         CHECK___builtin_cpu_is ("znver4");
+         cpu_model->__cpu_subtype = AMDFAM19H_ZNVER4;
+       }
+      else if (has_cpu_feature (cpu_model, cpu_features2,
+                               FEATURE_AVX512F))
+       {
+         cpu = "znver4";
+         CHECK___builtin_cpu_is ("znver4");
+         cpu_model->__cpu_subtype = AMDFAM19H_ZNVER4;
+       }
       else if (has_cpu_feature (cpu_model, cpu_features2,
                                FEATURE_VAES))
        {
index 6a2a7e3d25a0819eb2195f45d273b4ac477936bc..4b01c3540e55a3dd1bd99bfef87ec11fad0d8e7e 100644 (file)
@@ -1868,7 +1868,8 @@ const char *const processor_names[] =
   "btver2",
   "znver1",
   "znver2",
-  "znver3"
+  "znver3",
+  "znver4"
 };
 
 /* Guarantee that the array is aligned with enum processor_type.  */
@@ -2104,37 +2105,17 @@ const pta processor_alias_table[] =
       | PTA_MOVBE | PTA_MWAITX,
     M_CPU_SUBTYPE (AMDFAM15H_BDVER4), P_PROC_AVX2},
   {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
-    PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-      | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-      | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
-      | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
-      | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
-      | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
-      | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
-      | PTA_SHA | PTA_LZCNT | PTA_POPCNT,
+    PTA_ZNVER1,
     M_CPU_SUBTYPE (AMDFAM17H_ZNVER1), P_PROC_AVX2},
   {"znver2", PROCESSOR_ZNVER2, CPU_ZNVER2,
-    PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-      | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-      | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
-      | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
-      | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
-      | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
-      | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
-      | PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID
-      | PTA_WBNOINVD,
+    PTA_ZNVER2,
     M_CPU_SUBTYPE (AMDFAM17H_ZNVER2), P_PROC_AVX2},
   {"znver3", PROCESSOR_ZNVER3, CPU_ZNVER3,
-    PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-      | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-      | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
-      | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
-      | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
-      | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
-      | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
-      | PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID
-      | PTA_WBNOINVD | PTA_VAES | PTA_VPCLMULQDQ | PTA_PKU,
+    PTA_ZNVER3,
     M_CPU_SUBTYPE (AMDFAM19H_ZNVER3), P_PROC_AVX2},
+  {"znver4", PROCESSOR_ZNVER4, CPU_ZNVER4,
+    PTA_ZNVER4,
+    M_CPU_SUBTYPE (AMDFAM19H_ZNVER4), P_PROC_AVX512F},
   {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
     PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
       | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_PRFCHW
index 9a6b92fab799457233627f11d1bb309d7d32f8c6..9893fc422bcc255c09f935ad3a9a619bdfa46387 100644 (file)
@@ -92,6 +92,7 @@ enum processor_subtypes
   AMDFAM19H_ZNVER3,
   INTEL_COREI7_ROCKETLAKE,
   ZHAOXIN_FAM7H_LUJIAZUI,
+  AMDFAM19H_ZNVER4,
   CPU_SUBTYPE_MAX
 };
 
index a3d336e1f18f3efee51eb5dc28a41be5a594c557..160c52c542958a80fa8736def5c2120ae2dab773 100644 (file)
@@ -660,7 +660,7 @@ c7 esther"
 # 64-bit x86 processors supported by --with-arch=.  Each processor
 # MUST be separated by exactly one space.
 x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
-bdver3 bdver4 znver1 znver2 znver3 btver1 btver2 k8 k8-sse3 opteron \
+bdver3 bdver4 znver1 znver2 znver3 znver4 btver1 btver2 k8 k8-sse3 opteron \
 opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 atom \
 slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
 silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
@@ -3643,6 +3643,10 @@ case ${target} in
        arch=znver3
        cpu=znver3
        ;;
+      znver4-*)
+       arch=znver4
+       cpu=znver4
+       ;;
       bdver4-*)
         arch=bdver4
         cpu=bdver4
@@ -3771,6 +3775,10 @@ case ${target} in
       znver3-*)
        arch=znver3
        cpu=znver3
+       ;;
+         znver4-*)
+       arch=znver4
+       cpu=znver4
        ;;
       bdver4-*)
         arch=bdver4
index ef567045c6796b408c9d82772300ccb26bf2fa47..aa16895442f7575e19d37355237982261fa73867 100644 (file)
@@ -465,6 +465,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
        processor = PROCESSOR_GEODE;
       else if (has_feature (FEATURE_MOVBE) && family == 22)
        processor = PROCESSOR_BTVER2;
+      else if (has_feature (FEATURE_AVX512F))
+       processor = PROCESSOR_ZNVER4;
       else if (has_feature (FEATURE_VAES))
        processor = PROCESSOR_ZNVER3;
       else if (has_feature (FEATURE_CLWB))
@@ -779,6 +781,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
     case PROCESSOR_ZNVER3:
       cpu = "znver3";
       break;
+    case PROCESSOR_ZNVER4:
+      cpu = "znver4";
+      break;
     case PROCESSOR_BTVER1:
       cpu = "btver1";
       break;
index a9a35c0a18a64b3dcaefaec2f8bfd4481479c864..f70f8918da97201edfd651cc0465107a89699f61 100644 (file)
@@ -132,6 +132,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
       def_or_undef (parse_in, "__znver3");
       def_or_undef (parse_in, "__znver3__");
       break;
+    case PROCESSOR_ZNVER4:
+      def_or_undef (parse_in, "__znver4");
+      def_or_undef (parse_in, "__znver4__");
+      break;
     case PROCESSOR_BTVER1:
       def_or_undef (parse_in, "__btver1");
       def_or_undef (parse_in, "__btver1__");
@@ -330,6 +334,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     case PROCESSOR_ZNVER3:
       def_or_undef (parse_in, "__tune_znver3__");
       break;
+    case PROCESSOR_ZNVER4:
+      def_or_undef (parse_in, "__tune_znver4__");
+      break;
     case PROCESSOR_BTVER1:
       def_or_undef (parse_in, "__tune_btver1__");
       break;
index 3e6d04433a656a81743ce2ba64357f9a14c023c7..1f14d6917d8dc753ed31c9294b432912febba9c7 100644 (file)
@@ -154,11 +154,12 @@ along with GCC; see the file COPYING3.  If not see
 #define m_ZNVER1 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER1)
 #define m_ZNVER2 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER2)
 #define m_ZNVER3 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER3)
+#define m_ZNVER4 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER4)
 #define m_BTVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER1)
 #define m_BTVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER2)
 #define m_BDVER        (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
 #define m_BTVER (m_BTVER1 | m_BTVER2)
-#define m_ZNVER        (m_ZNVER1 | m_ZNVER2 | m_ZNVER3)
+#define m_ZNVER        (m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4)
 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
                        | m_ZNVER)
 
@@ -773,7 +774,8 @@ static const struct processor_costs *processor_cost_table[] =
   &btver2_cost,
   &znver1_cost,
   &znver2_cost,
-  &znver3_cost
+  &znver3_cost,
+  &znver4_cost
 };
 
 /* Guarantee that the array is aligned with enum processor_type.  */
index 480db35f6cd6cbeea509ae4236f5794b4a00c422..aeea26ef4bee294e13af73e25f917d35c4123fdb 100644 (file)
@@ -23079,7 +23079,7 @@ ix86_reassociation_width (unsigned int op, machine_mode mode)
       /* Integer vector instructions execute in FP unit
         and can execute 3 additions and one multiplication per cycle.  */
       if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
-          || ix86_tune == PROCESSOR_ZNVER3)
+          || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
          && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
        return 1;
 
index 372a2cff8feb989799786d4a91dc53cad525d6ae..fd7c9df47e515836cb87a0eaafbc1fbe76f92f77 100644 (file)
@@ -2255,6 +2255,7 @@ enum processor_type
   PROCESSOR_ZNVER1,
   PROCESSOR_ZNVER2,
   PROCESSOR_ZNVER3,
+  PROCESSOR_ZNVER4,
   PROCESSOR_max
 };
 
@@ -2347,6 +2348,21 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
 constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
   | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
+constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
+  | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2
+  | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT
+  | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
+  | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SHA | PTA_LZCNT
+  | PTA_POPCNT;
+constexpr wide_int_bitmask PTA_ZNVER2 = PTA_ZNVER1 | PTA_CLWB | PTA_RDPID
+  | PTA_WBNOINVD;
+constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ
+  | PTA_PKU;
+constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ
+  | PTA_AVX512IFMA | PTA_AVX512CD | PTA_AVX512BW | PTA_AVX512VL
+  | PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI
+  | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ;
 
 #ifndef GENERATOR_FILE
 
index 93538c5b3c6bdff16ffa8a296ce3c48fb7817f7b..baf1f1f8fa22166d95a812a11c7b8e3f1cf48540 100644 (file)
 ;; Processor type.
 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
                    atom,slm,glm,haswell,generic,lujiazui,amdfam10,bdver1,
-                   bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3"
+                   bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
   (const (symbol_ref "ix86_schedule")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
 (include "bdver1.md")
 (include "bdver3.md")
 (include "btver2.md")
-(include "znver1.md")
+(include "znver.md")
 (include "geode.md")
 (include "atom.md")
 (include "slm.md")
index 6c9066c84cc3b43c71ec8d92f1f852fe8d7c201a..aeaa7eb008e2e6776072c48069f63cae1ab90ec7 100644 (file)
@@ -1820,6 +1820,139 @@ struct processor_costs znver3_cost = {
   "16",                                        /* Func alignment.  */
 };
 
+/* This table currently replicates znver3_cost table. */
+struct processor_costs znver4_cost = {
+  {
+  /* Start of register allocator costs.  integer->integer move cost is 2. */
+
+  /* reg-reg moves are done by renaming and thus they are even cheaper than
+     1 cycle.  Because reg-reg move cost is 2 and following tables correspond
+     to doubles of latencies, we do not model this correctly.  It does not
+     seem to make practical difference to bump prices up even more.  */
+  6,                                   /* cost for loading QImode using
+                                          movzbl.  */
+  {6, 6, 6},                           /* cost of loading integer registers
+                                          in QImode, HImode and SImode.
+                                          Relative to reg-reg move (2).  */
+  {8, 8, 8},                           /* cost of storing integer
+                                          registers.  */
+  2,                                   /* cost of reg,reg fld/fst.  */
+  {6, 6, 16},                          /* cost of loading fp registers
+                                          in SFmode, DFmode and XFmode.  */
+  {8, 8, 16},                          /* cost of storing fp registers
+                                          in SFmode, DFmode and XFmode.  */
+  2,                                   /* cost of moving MMX register.  */
+  {6, 6},                              /* cost of loading MMX registers
+                                          in SImode and DImode.  */
+  {8, 8},                              /* cost of storing MMX registers
+                                          in SImode and DImode.  */
+  2, 2, 3,                             /* cost of moving XMM,YMM,ZMM
+                                          register.  */
+  {6, 6, 6, 6, 12},                    /* cost of loading SSE registers
+                                          in 32,64,128,256 and 512-bit.  */
+  {8, 8, 8, 8, 16},                    /* cost of storing SSE registers
+                                          in 32,64,128,256 and 512-bit.  */
+  6, 6,                                        /* SSE->integer and integer->SSE
+                                          moves.  */
+  8, 8,                                /* mask->integer and integer->mask moves */
+  {6, 6, 6},                           /* cost of loading mask register
+                                          in QImode, HImode, SImode.  */
+  {8, 8, 8},                           /* cost if storing mask register
+                                          in QImode, HImode, SImode.  */
+  2,                                   /* cost of moving mask register.  */
+  /* End of register allocator costs.  */
+  },
+
+  COSTS_N_INSNS (1),                   /* cost of an add instruction.  */
+  COSTS_N_INSNS (1),                   /* cost of a lea instruction.  */
+  COSTS_N_INSNS (1),                   /* variable shift costs.  */
+  COSTS_N_INSNS (1),                   /* constant shift costs.  */
+  {COSTS_N_INSNS (3),                  /* cost of starting multiply for QI.  */
+   COSTS_N_INSNS (3),                  /*                               HI.  */
+   COSTS_N_INSNS (3),                  /*                               SI.  */
+   COSTS_N_INSNS (3),                  /*                               DI.  */
+   COSTS_N_INSNS (3)},                 /*                      other.  */
+  0,                                   /* cost of multiply per each bit
+                                          set.  */
+  {COSTS_N_INSNS (9),                  /* cost of a divide/mod for QI.  */
+   COSTS_N_INSNS (10),                 /*                          HI.  */
+   COSTS_N_INSNS (12),                 /*                          SI.  */
+   COSTS_N_INSNS (17),                 /*                          DI.  */
+   COSTS_N_INSNS (17)},                        /*                          other.  */
+  COSTS_N_INSNS (1),                   /* cost of movsx.  */
+  COSTS_N_INSNS (1),                   /* cost of movzx.  */
+  8,                                   /* "large" insn.  */
+  9,                                   /* MOVE_RATIO.  */
+  6,                                   /* CLEAR_RATIO */
+  {6, 6, 6},                           /* cost of loading integer registers
+                                          in QImode, HImode and SImode.
+                                          Relative to reg-reg move (2).  */
+  {8, 8, 8},                           /* cost of storing integer
+                                          registers.  */
+  {6, 6, 6, 6, 12},                    /* cost of loading SSE registers
+                                          in 32bit, 64bit, 128bit, 256bit and 512bit */
+  {8, 8, 8, 8, 16},                    /* cost of storing SSE register
+                                          in 32bit, 64bit, 128bit, 256bit and 512bit */
+  {6, 6, 6, 6, 12},                    /* cost of unaligned loads.  */
+  {8, 8, 8, 8, 16},                    /* cost of unaligned stores.  */
+  2, 2, 3,                             /* cost of moving XMM,YMM,ZMM
+                                          register.  */
+  6,                                   /* cost of moving SSE register to integer.  */
+  /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
+     throughput 9.  Approx 7 uops do not depend on vector size and every load
+     is 4 uops.  */
+  14, 8,                               /* Gather load static, per_elt.  */
+  14, 10,                              /* Gather store static, per_elt.  */
+  32,                                  /* size of l1 cache.  */
+  512,                                 /* size of l2 cache.  */
+  64,                                  /* size of prefetch block.  */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,                                 /* number of parallel prefetches.  */
+  3,                                   /* Branch cost.  */
+  COSTS_N_INSNS (5),                   /* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (5),                   /* cost of FMUL instruction.  */
+  /* Latency of fdiv is 8-15.  */
+  COSTS_N_INSNS (15),                  /* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),                   /* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),                   /* cost of FCHS instruction.  */
+  /* Latency of fsqrt is 4-10.  */
+  COSTS_N_INSNS (10),                  /* cost of FSQRT instruction.  */
+
+  COSTS_N_INSNS (1),                   /* cost of cheap SSE instruction.  */
+  COSTS_N_INSNS (3),                   /* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_INSNS (3),                   /* cost of MULSS instruction.  */
+  COSTS_N_INSNS (3),                   /* cost of MULSD instruction.  */
+  COSTS_N_INSNS (5),                   /* cost of FMA SS instruction.  */
+  COSTS_N_INSNS (5),                   /* cost of FMA SD instruction.  */
+  COSTS_N_INSNS (10),                  /* cost of DIVSS instruction.  */
+  /* 9-13.  */
+  COSTS_N_INSNS (13),                  /* cost of DIVSD instruction.  */
+  COSTS_N_INSNS (10),                  /* cost of SQRTSS instruction.  */
+  COSTS_N_INSNS (15),                  /* cost of SQRTSD instruction.  */
+  /* Zen can execute 4 integer operations per cycle.  FP operations
+     take 3 cycles and it can execute 2 integer additions and 2
+     multiplications thus reassociation may make sense up to with of 6.
+     SPEC2k6 bencharks suggests
+     that 4 works better than 6 probably due to register pressure.
+
+     Integer vector operations are taken by FP unit and execute 3 vector
+     plus/minus operations per cycle but only one multiply.  This is adjusted
+     in ix86_reassociation_width.  */
+  4, 4, 3, 6,                          /* reassoc int, fp, vec_int, vec_fp.  */
+  znver2_memcpy,
+  znver2_memset,
+  COSTS_N_INSNS (4),                   /* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2),                   /* cond_not_taken_branch_cost.  */
+  "16",                                        /* Loop alignment.  */
+  "16",                                        /* Jump alignment.  */
+  "0:0:8",                             /* Label alignment.  */
+  "16",                                        /* Func alignment.  */
+};
+
 /* skylake_cost should produce code tuned for Skylake familly of CPUs.  */
 static stringop_algs skylake_memcpy[2] =   {
   {libcall,
index e2765f81902df13139be487a092c61923da7c0e4..96eb06a5b6dd67e13ad02609fb9b39add7dc3c28 100644 (file)
@@ -68,6 +68,7 @@ ix86_issue_rate (void)
     case PROCESSOR_ZNVER1:
     case PROCESSOR_ZNVER2:
     case PROCESSOR_ZNVER3:
+    case PROCESSOR_ZNVER4:
     case PROCESSOR_CORE2:
     case PROCESSOR_NEHALEM:
     case PROCESSOR_SANDYBRIDGE:
@@ -415,6 +416,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
     case PROCESSOR_ZNVER1:
     case PROCESSOR_ZNVER2:
     case PROCESSOR_ZNVER3:
+    case PROCESSOR_ZNVER4:
       /* Stack engine allows to execute push&pop instructions in parall.  */
       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
          && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
similarity index 62%
rename from gcc/config/i386/znver1.md
rename to gcc/config/i386/znver.md
index 9c25b4e27c34f25a34d89ecdb087af9e005365d5..376a145b95edd7e0d26d7a3a8cc166b4da69752a 100644 (file)
@@ -23,8 +23,8 @@
 
 ;; AMD znver1, znver2 and znver3 Scheduling
 ;; Modeling automatons for zen decoders, integer execution pipes,
-;; AGU pipes and floating point execution units.
-(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu")
+;; AGU pipes, floating point execution, branch and store units.
+(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu, znver4_bru, znver4_fp_store")
 
 ;; Decoders unit has 4 decoders and all of them can decode fast path
 ;; and vector type instructions.
@@ -63,6 +63,8 @@
 ;; Load is 4 cycles. We do not model reservation of load unit.
 ;;(define_reservation "znver1-load" "znver1-agu-reserve, nothing, nothing, nothing")
 (define_reservation "znver1-load" "znver1-agu-reserve")
+;; According to Manual, all AGU are used for loads and stores in znver4.
+(define_reservation "znver4-load" "znver2-store-agu-reserve")
 ;; Store operations differs between znver1, znver2 and znver3 because extra AGU
 ;; was added.
 (define_reservation "znver1-store" "znver1-agu-reserve")
                                      +znver1-fp2+znver1-fp3
                                      +znver1-agu0+znver1-agu1+znver2-agu2")
 
+;; znver4 has one branch unit in znver1-ieu0 and a separate branch unit.
+(define_cpu_unit "znver4-bru0" "znver4_bru")
+;; znver4 also has dedicated fp-store unit.
+(define_cpu_unit "znver4-fp-store0" "znver4_fp_store")
+
 ;; Call instruction
 (define_insn_reservation "znver1_call" 1
                         (and (eq_attr "cpu" "znver1")
                              (eq_attr "type" "call,callv"))
                         "znver1-double,znver2-store,znver1-ieu0|znver1-ieu3")
 
+(define_insn_reservation "znver4_call" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (eq_attr "type" "call,callv"))
+                        "znver1-double,znver1-ieu0|znver4-bru0,znver2-store")
+
 ;; General instructions
 (define_insn_reservation "znver1_push" 1
                         (and (eq_attr "cpu" "znver1")
                                   (eq_attr "memory" "store")))
                         "znver1-direct,znver1-store")
 (define_insn_reservation "znver2_push" 1
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "type" "push")
                                   (eq_attr "memory" "store")))
                         "znver1-direct,znver2-store")
                              (and (eq_attr "type" "push")
                                   (eq_attr "memory" "both")))
                         "znver1-direct,znver1-load,znver2-store")
+(define_insn_reservation "znver4_push_load" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "push")
+                                  (eq_attr "memory" "both")))
+                        "znver1-direct,znver4-load,znver2-store")
 
 (define_insn_reservation "znver1_pop" 4
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "pop")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load")
+(define_insn_reservation "znver4_pop" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "pop")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load")
 
 (define_insn_reservation "znver1_pop_mem" 4
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "type" "pop")
                                   (eq_attr "memory" "both")))
                         "znver1-direct,znver1-load,znver2-store")
+(define_insn_reservation "znver4_pop_mem" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "pop")
+                                  (eq_attr "memory" "both")))
+                        "znver1-direct,znver4-load,znver2-store")
 
 ;; Leave
 (define_insn_reservation "znver1_leave" 1
                              (eq_attr "type" "leave"))
                         "znver1-double,znver1-ieu, znver1-store")
 (define_insn_reservation "znver2_leave" 1
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (eq_attr "type" "leave"))
                         "znver1-double,znver1-ieu, znver2-store")
 
                              (and (eq_attr "type" "imul")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-ieu1")
+(define_insn_reservation "znver4_imul" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "imul")
+                                  (and (eq_attr "mode" "SI,HI,QI")
+                                  (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-ieu1")
+(define_insn_reservation "znver4_imul_DI" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "imul")
+                                  (and (eq_attr "mode" "DI")
+                                  (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-ieu1")
 
 (define_insn_reservation "znver1_imul_mem" 7
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "imul")
                                   (eq_attr "memory" "!none")))
                         "znver1-direct,znver1-load, znver1-ieu1")
+(define_insn_reservation "znver4_imul_mem" 7
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "imul")
+                                  (eq_attr "memory" "!none")))
+                        "znver1-direct,znver4-load, znver1-ieu1")
 
 ;; Divisions
 ;; Reg operands
                               (and (eq_attr "type" "idiv")
                                    (and (eq_attr "mode" "DI")
                                         (eq_attr "memory" "load"))))
-                         "znver1-double,znver1-load,znver1-ieu2*22")
+                         "znver1-double,znver1-load,znver1-ieu2*18")
 
 (define_insn_reservation "znver3_idiv_mem_SI" 16
                          (and (eq_attr "cpu" "znver3")
                               (and (eq_attr "type" "idiv")
                                    (and (eq_attr "mode" "SI")
                                         (eq_attr "memory" "load"))))
-                         "znver1-double,znver1-load,znver1-ieu2*16")
+                         "znver1-double,znver1-load,znver1-ieu2*12")
 
 (define_insn_reservation "znver3_idiv_mem_HI" 14
                          (and (eq_attr "cpu" "znver3")
                                         (eq_attr "memory" "load"))))
                          "znver1-direct,znver1-load,znver1-ieu2*9")
 
+(define_insn_reservation "znver4_idiv_DI" 18
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "idiv")
+                                  (and (eq_attr "mode" "DI")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-double,znver1-ieu0*18")
+
+(define_insn_reservation "znver4_idiv_SI" 12
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "idiv")
+                                  (and (eq_attr "mode" "SI")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-double,znver1-ieu0*12")
+
+(define_insn_reservation "znver4_idiv_HI" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "idiv")
+                                  (and (eq_attr "mode" "HI")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-double,znver1-ieu0*10")
+
+(define_insn_reservation "znver4_idiv_QI" 9
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "idiv")
+                                  (and (eq_attr "mode" "QI")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-ieu0*9")
+
+(define_insn_reservation "znver4_idiv_mem_DI" 22
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "idiv")
+                                  (and (eq_attr "mode" "DI")
+                                       (eq_attr "memory" "load"))))
+                        "znver1-double,znver4-load,znver1-ieu0*18")
+
+(define_insn_reservation "znver4_idiv_mem_SI" 16
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "idiv")
+                                  (and (eq_attr "mode" "SI")
+                                       (eq_attr "memory" "load"))))
+                        "znver1-double,znver4-load,znver1-ieu0*12")
+
+(define_insn_reservation "znver4_idiv_mem_HI" 14
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "idiv")
+                                  (and (eq_attr "mode" "HI")
+                                       (eq_attr "memory" "load"))))
+                        "znver1-double,znver4-load,znver1-ieu0*10")
+
+(define_insn_reservation "znver4_idiv_mem_QI" 13
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "idiv")
+                                  (and (eq_attr "mode" "QI")
+                                       (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-ieu0*9")
+
 ;; STR ISHIFT which are micro coded.
 ;; Fix me: Latency need to be rechecked.
 (define_insn_reservation "znver1_str_ishift" 6
                         "znver1-vector,znver1-ivector")
 
 (define_insn_reservation "znver2_str_ishift" 3
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "type" "ishift")
                                   (eq_attr "memory" "both,store")))
-                        "znver1-vector,znver1-ivector")
+                        "znver1-vector,znver2-ivector")
 (define_insn_reservation "znver2_str_istr" 19
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "type" "str")
                                   (eq_attr "memory" "both,store")))
-                        "znver1-vector,znver1-ivector")
+                        "znver1-vector,znver2-ivector")
 
 ;; MOV - integer moves
 (define_insn_reservation "znver1_load_imov_double" 2
                                        (eq_attr "memory" "none"))))
                         "znver1-double,znver1-ieu|znver1-ieu")
 
+(define_insn_reservation "znver4_load_imov_double" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "znver1_decode" "double")
+                                  (and (eq_attr "type" "imovx")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-double,znver1-ieu0|znver1-ieu3")
+
 (define_insn_reservation "znver1_load_imov_direct" 1
-                        (and (eq_attr "cpu" "znver1,znver2,znver3")
+                        (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
                              (and (eq_attr "type" "imov,imovx")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-ieu")
                         "znver1-double,znver1-ieu|znver1-ieu,znver1-store")
 
 (define_insn_reservation "znver2_load_imov_double_store" 1
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "znver1_decode" "double")
                                   (and (eq_attr "type" "imovx")
                                        (eq_attr "memory" "store"))))
                                   "znver1-direct,znver1-ieu,znver1-store")
 
 (define_insn_reservation "znver2_load_imov_direct_store" 1
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "type" "imov,imovx")
                                   (eq_attr "memory" "store")))
                                   "znver1-direct,znver1-ieu,znver2-store")
                                        (eq_attr "memory" "load"))))
                         "znver1-double,znver1-load,znver1-ieu|znver1-ieu")
 
+(define_insn_reservation "znver4_load_imov_double_load" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "znver1_decode" "double")
+                                  (and (eq_attr "type" "imovx")
+                                       (eq_attr "memory" "load"))))
+                        "znver1-double,znver4-load,znver1-ieu")
+
 (define_insn_reservation "znver1_load_imov_direct_load" 4
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "imov,imovx")
                                   (eq_attr "memory" "none,unknown")))
                         "znver1-direct,znver1-ieu")
 
+(define_insn_reservation "znver4_insn_1" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "alu,icmp,negnot,test,incdec")
+                                  (eq_attr "memory" "none,unknown")))
+                        "znver1-direct,znver1-ieu")
+
+(define_insn_reservation "znver4_insn_2" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "rotate,rotate1,ishift,ishift1")
+                                  (eq_attr "memory" "none,unknown")))
+                        "znver1-direct,znver1-ieu1|znver1-ieu2")
+
+(define_insn_reservation "znver4_insn_3" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "setcc,icmov")
+                                  (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-ieu0|znver1-ieu3")
+
 (define_insn_reservation "znver1_insn_load" 5
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-ieu")
 
+(define_insn_reservation "znver4_insn_1_load" 5
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "alu,icmp,negnot,test,incdec")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-ieu")
+
+(define_insn_reservation "znver4_insn_2_load" 5
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "rotate,rotate1,ishift,ishift1")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-ieu1|znver1-ieu2")
+
+(define_insn_reservation "znver4_insn_3_load" 5
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "setcc,icmov")
+                                  (eq_attr "memory" "load")))
+                        "znver1-double,znver4-load,znver1-ieu0|znver1-ieu3")
+
 (define_insn_reservation "znver1_insn_store" 1
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
                                   (eq_attr "memory" "store")))
                         "znver1-direct,znver1-ieu,znver2-store")
 
+(define_insn_reservation "znver4_insn_1_store" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "alu,icmp,negnot,test,incdec")
+                                  (eq_attr "memory" "store")))
+                        "znver1-direct,znver1-ieu,znver2-store")
+
+(define_insn_reservation "znver4_insn_2_store" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "rotate,rotate1,ishift,ishift1")
+                                  (eq_attr "memory" "store")))
+                        "znver1-direct,znver1-ieu1|znver1-ieu2,znver2-store")
+
+(define_insn_reservation "znver4_insn_3_store" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "setcc,icmov")
+                                  (eq_attr "memory" "store")))
+                        "znver1-double,znver1-ieu0|znver1-ieu3,znver2-store")
+
 (define_insn_reservation "znver1_insn_both" 5
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
                                   (eq_attr "memory" "both")))
                         "znver1-direct,znver1-load,znver1-ieu,znver2-store")
 
+(define_insn_reservation "znver4_insn_1_both" 5
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "alu,icmp,negnot,test,incdec")
+                                  (eq_attr "memory" "both")))
+                        "znver1-direct,znver4-load,znver1-ieu,znver2-store")
+
+(define_insn_reservation "znver4_insn_2_both" 5
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "rotate,rotate1,ishift,ishift1")
+                                  (eq_attr "memory" "both")))
+                        "znver1-direct,znver4-load,znver1-ieu1|znver1-ieu2,znver2-store")
+
+(define_insn_reservation "znver4_insn_3_both" 5
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "setcc,icmov")
+                                  (eq_attr "memory" "both")))
+                        "znver1-double,znver4-load,znver1-ieu0|znver1-ieu3,znver2-store")
+
 ;; Fix me: Other vector type insns keeping latency 6 as of now.
 (define_insn_reservation "znver1_ieu_vector" 6
                         (and (eq_attr "cpu" "znver1")
                         "znver1-vector,znver1-ivector")
 
 (define_insn_reservation "znver2_ieu_vector" 5
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (eq_attr "type" "other,str,multi"))
                         "znver1-vector,znver2-ivector")
 
                         "znver1-vector,znver1-ivector")
 
 (define_insn_reservation "znver2_alu1_vector" 3
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "znver1_decode" "vector")
                                   (and (eq_attr "type" "alu1")
                                        (eq_attr "memory" "none,unknown"))))
                         "znver1-vector,znver2-ivector")
 
 (define_insn_reservation "znver1_alu1_double" 2
-                        (and (eq_attr "cpu" "znver1,znver2,znver3")
+                        (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
                              (and (eq_attr "znver1_decode" "double")
                                   (and (eq_attr "type" "alu1")
                                        (eq_attr "memory" "none,unknown"))))
                         "znver1-double,znver1-ieu")
 
 (define_insn_reservation "znver1_alu1_direct" 1
-                        (and (eq_attr "cpu" "znver1,znver2,znver3")
+                        (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
                              (and (eq_attr "znver1_decode" "direct")
                                   (and (eq_attr "type" "alu1")
                                        (eq_attr "memory" "none,unknown"))))
                              (and (eq_attr "type" "ibr")
                                        (eq_attr "memory" "none")))
                          "znver1-direct")
+(define_insn_reservation "znver4_branch" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ibr")
+                                       (eq_attr "memory" "none")))
+                         "znver1-direct,znver1-ieu0|znver4-bru0")
 
 ;; Indirect branches check latencies.
 (define_insn_reservation "znver1_indirect_branch_mem" 6
                                        (eq_attr "memory" "load")))
                         "znver1-vector,znver2-ivector")
 
+(define_insn_reservation "znver4_indirect_branch_mem" 6
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ibr")
+                                       (eq_attr "memory" "load")))
+                        "znver1-vector,znver2-ivector+znver4-bru0")
+
 ;; LEA executes in ALU units with 1 cycle latency.
 (define_insn_reservation "znver1_lea" 1
-                        (and (eq_attr "cpu" "znver1,znver2,znver3")
+                        (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
                              (eq_attr "type" "lea"))
                         "znver1-direct,znver1-ieu")
 
-;; Other integer instrucions
+;; Other integer instructions
 (define_insn_reservation "znver1_idirect" 1
-                        (and (eq_attr "cpu" "znver1,znver2,znver3")
+                        (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
                              (and (eq_attr "unit" "integer,unknown")
                                   (eq_attr "memory" "none,unknown")))
                         "znver1-direct,znver1-ieu")
 
 ;;  Floating point
 (define_insn_reservation "znver1_fp_cmov" 6
-                        (and (eq_attr "cpu" "znver1,znver2,znver3")
+                        (and (eq_attr "cpu" "znver1")
                              (eq_attr "type" "fcmov"))
                         "znver1-vector,znver1-fvector")
 
+(define_insn_reservation "znver2_fp_cmov" 6
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
+                             (eq_attr "type" "fcmov"))
+                        "znver1-vector,znver2-fvector")
+
 (define_insn_reservation "znver1_fp_mov_direct_load" 8 
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "znver1_decode" "direct")
                                        (eq_attr "memory" "load"))))
                         "znver1-direct,znver1-load,znver1-fp3|znver1-fp1")
 
+(define_insn_reservation "znver4_fp_mov_direct_load" 8 
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "znver1_decode" "direct")
+                                  (and (eq_attr "type" "fmov")
+                                       (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-fp2|znver1-fp3")
+
 (define_insn_reservation "znver1_fp_mov_direct_store" 5
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "znver1_decode" "direct")
                                        (eq_attr "memory" "store"))))
                         "znver1-direct,znver1-fp2|znver1-fp3,znver1-store")
 (define_insn_reservation "znver2_fp_mov_direct_store" 5
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "znver1_decode" "direct")
                                   (and (eq_attr "type" "fmov")
                                        (eq_attr "memory" "store"))))
                                        (eq_attr "memory" "none"))))
                         "znver1-double,znver1-fp3")
 
+(define_insn_reservation "znver4_fp_mov_double" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "znver1_decode" "double")
+                                  (and (eq_attr "type" "fmov")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-double,znver1-fp1")
+
 (define_insn_reservation "znver1_fp_mov_double_load" 12
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "znver1_decode" "double")
                                        (eq_attr "memory" "load"))))
                         "znver1-double,znver1-load,znver1-fp3")
 
+(define_insn_reservation "znver4_fp_mov_double_load" 11
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "znver1_decode" "double")
+                                  (and (eq_attr "type" "fmov")
+                                       (eq_attr "memory" "load"))))
+                        "znver1-double,znver4-load,znver1-fp1")
+
 (define_insn_reservation "znver1_fp_mov_direct" 1
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (eq_attr "type" "fmov"))
                         "znver1-direct,znver1-fp3")
 
+(define_insn_reservation "znver4_fp_mov_direct" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (eq_attr "type" "fmov"))
+                        "znver1-direct,znver1-fp1")
+
 ;; TODO: AGU?
 (define_insn_reservation "znver1_fp_spc_direct" 5
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                                   (eq_attr "memory" "store")))
                         "znver1-direct,znver1-fp3,znver1-fp2")
 
+(define_insn_reservation "znver4_fp_spc_direct" 5
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fpspc")
+                                  (eq_attr "memory" "store")))
+                        "znver1-direct,znver1-fp1,znver4-fp-store0")
+
+(define_insn_reservation "znver4_fp_sqrt_direct" 22
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fpspc")
+                                  (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-fp1")
+
 (define_insn_reservation "znver1_fp_insn_vector" 6
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "znver1_decode" "vector")
                                   (eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov")))
                         "znver1-vector,znver1-fvector")
 (define_insn_reservation "znver2_fp_insn_vector" 6
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "znver1_decode" "vector")
                                   (eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov")))
                         "znver1-vector,znver2-fvector")
                              (eq_attr "type" "fsgn"))
                         "znver1-direct,znver1-fp3")
 
+(define_insn_reservation "znver4_fp_fsgn" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (eq_attr "type" "fsgn"))
+                        "znver1-direct,znver1-fp0|znver1-fp1")
+
 (define_insn_reservation "znver1_fp_fcmp" 2
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "memory" "none")
                                        (eq_attr "type" "fcmp"))))
                         "znver1-double,znver1-fp0,znver1-fp2")
 
+(define_insn_reservation "znver4_fp_fcmp_double" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "memory" "none")
+                                  (and (eq_attr "znver1_decode" "double")
+                                       (eq_attr "type" "fcmp"))))
+                        "znver1-double,znver1-fp0,znver4-fp-store0")
+
+(define_insn_reservation "znver4_fp_fcmp" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fcmp")
+                                  (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-fp0")
+
 (define_insn_reservation "znver1_fp_fcmp_load" 9
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
-                             (and (eq_attr "memory" "none")
+                             (and (eq_attr "memory" "load")
                                   (and (eq_attr "znver1_decode" "double")
                                        (eq_attr "type" "fcmp"))))
                         "znver1-double,znver1-load, znver1-fp0,znver1-fp2")
 
+(define_insn_reservation "znver4_fp_fcmp_double_load" 11
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "memory" "load")
+                                  (and (eq_attr "znver1_decode" "double")
+                                       (eq_attr "type" "fcmp"))))
+                        "znver1-double,znver4-load,znver1-fp0,znver4-fp-store0")
+
+(define_insn_reservation "znver4_fp_fcmp_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fcmp")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fp0")
+
 ;;FADD FSUB FMUL
 (define_insn_reservation "znver1_fp_op_mul" 5
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp0*5")
 
+(define_insn_reservation "znver4_fp_op_mul" 6
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fop,fmul")
+                                  (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-fp0*6")
+
 (define_insn_reservation "znver1_fp_op_mul_load" 12 
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "fop,fmul")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp0*5")
 
+(define_insn_reservation "znver4_fp_op_mul_load" 13 
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fop,fmul")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fp0*6")
+
+(define_insn_reservation "znver4_fp_op_imul" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fop,fmul")
+                                  (and (eq_attr "fp_int_src" "true")
+                                   (eq_attr "memory" "none"))))
+                        "znver1-double,znver1-fp1,znver1-fp0")
+
 (define_insn_reservation "znver1_fp_op_imul_load" 16
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "fop,fmul")
                                        (eq_attr "memory" "load"))))
                        "znver1-double,znver1-load,znver1-fp3,znver1-fp0")
 
+(define_insn_reservation "znver4_fp_op_imul_load" 17
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fop,fmul")
+                                  (and (eq_attr "fp_int_src" "true")
+                                   (eq_attr "memory" "load"))))
+                        "znver1-double,znver4-load,znver1-fp1,znver1-fp0")
+
 (define_insn_reservation "znver1_fp_op_div" 15
-                        (and (eq_attr "cpu" "znver1,znver2,znver3")
+                        (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
                              (and (eq_attr "type" "fdiv")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp3*15")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp3*15")
 
+(define_insn_reservation "znver4_fp_op_div_load" 22
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fdiv")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fp3*15")
+
 (define_insn_reservation "znver1_fp_op_idiv_load" 27
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "type" "fdiv")
                                        (eq_attr "memory" "load"))))
                         "znver1-double,znver1-load,znver1-fp3*19")
 
+(define_insn_reservation "znver4_fp_op_idiv" 19
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fdiv")
+                                  (and (eq_attr "fp_int_src" "true")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-double,znver1-fp1,znver1-fp1")
+
+(define_insn_reservation "znver4_fp_op_idiv_load" 26
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "fdiv")
+                                  (and (eq_attr "fp_int_src" "true")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-double,znver4-load,znver1-fp1,znver1-fp1")
 
 ;; MMX, SSE, SSEn.n, AVX, AVX2 instructions
 (define_insn_reservation "znver1_fp_insn" 1
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
 
+(define_insn_reservation "znver4_fp_insn" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (eq_attr "type" "mmx,mmxadd"))
+                        "znver1-direct,znver1-fpu")
+
 (define_insn_reservation "znver1_mmx_add_load" 8
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "mmxadd")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3")
 
+(define_insn_reservation "znver4_mmx_add_load" 8
+                        (and (eq_attr "cpu" "znver1,znver2,znver3")
+                             (and (eq_attr "type" "mmxadd")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fpu")
+
 (define_insn_reservation "znver1_mmx_cmp" 1
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "mmxcmp")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp0|znver1-fp3")
 
+(define_insn_reservation "znver4_mmx_cmp" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxcmp")
+                                  (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-fpu")
+
 (define_insn_reservation "znver1_mmx_cmp_load" 8
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "mmxcmp")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp0|znver1-fp3")
 
+(define_insn_reservation "znver4_mmx_cmp_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxcmp")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fpu")
+
 (define_insn_reservation "znver1_mmx_cvt_pck_shuf" 1
-                        (and (eq_attr "cpu" "znver1,znver2,znver3")
+                        (and (eq_attr "cpu" "znver1,znver2,znver3,znver4")
                              (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp1|znver1-fp2")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
 
+(define_insn_reservation "znver4_mmx_cvt_pck_shuf_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fp1|znver1-fp2")
+
 (define_insn_reservation "znver1_mmx_shift_move" 1
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "mmxshft,mmxmov")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp2")
 
+(define_insn_reservation "znver4_mmx_shift" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxshft")
+                                  (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-fp1|znver1-fp2")
+
+(define_insn_reservation "znver4_mmx_move" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxmov")
+                                  (eq_attr "memory" "none")))
+                        "znver1-direct,znver4-fp-store0")
+
 (define_insn_reservation "znver1_mmx_shift_move_load" 8
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "mmxshft,mmxmov")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp2")
 
+(define_insn_reservation "znver4_mmx_shift_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxshft")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fp1|znver1-fp2")
+
+(define_insn_reservation "znver4_mmx_move_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxmov")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver4-fp-store0")
+
 (define_insn_reservation "znver1_mmx_move_store" 1
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "type" "mmxshft,mmxmov")
                                   (eq_attr "memory" "store,both")))
                          "znver1-direct,znver1-fp2,znver2-store")
 
+(define_insn_reservation "znver4_mmx_shift_store" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxshft")
+                                  (eq_attr "memory" "store,both")))
+                        "znver1-direct,znver1-fp1|znver1-fp2,znver2-store")
+
+(define_insn_reservation "znver4_mmx_move_store" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxmov")
+                                  (eq_attr "memory" "store,both")))
+                        "znver1-direct,znver4-fp-store0")
+
 (define_insn_reservation "znver1_mmx_mul" 3
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "mmxmul")
                                   (eq_attr "memory" "none")))
                          "znver1-direct,znver1-fp0*3")
 
+(define_insn_reservation "znver4_mmx_mul" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxmul")
+                                  (eq_attr "memory" "none")))
+                         "znver1-direct,(znver1-fp0|znver1-fp3)*3")
+
 (define_insn_reservation "znver1_mmx_load" 10
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "mmxmul")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp0*3")
 
+(define_insn_reservation "znver4_mmx_mul_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "mmxmul")
+                                  (eq_attr "memory" "load")))
+                         "znver1-direct,znver4-load,(znver1-fp0|znver1-fp3)*3")
+
 ;; TODO
 (define_insn_reservation "znver1_avx256_log" 1
                         (and (eq_attr "cpu" "znver1")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fpu")
 
+(define_insn_reservation "znver4_sse_log" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "sselog,sselog1")
+                                  (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF")
+                                   (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-fpu")
+
+(define_insn_reservation "znver4_sse_log_evex" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "sselog,sselog1")
+                                  (and (eq_attr "mode" "V16SF,V8DF")
+                                   (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_log_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "sselog,sselog1")
+                                  (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF")
+                                   (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-fpu")
+
+(define_insn_reservation "znver4_sse_log_evex_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "sselog,sselog1")
+                                  (and (eq_attr "mode" "V16SF,V8DF")
+                                   (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_ilog" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "sselog,sselog1")
+                                  (and (eq_attr "mode" "OI")
+                                   (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_ilog_evex" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "sselog,sselog1")
+                                  (and (eq_attr "mode" "TI")
+                                   (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-fp0+znver1-fp1+znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_ilog_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "sselog,sselog1")
+                                  (and (eq_attr "mode" "OI")
+                                   (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3")
+
+(define_insn_reservation "znver4_sse_ilog_evex_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "sselog,sselog1")
+                                  (and (eq_attr "mode" "TI")
+                                   (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-fp0+znver1-fp1+znver1-fp2+znver1-fp3")
+
 (define_insn_reservation "znver1_sse_log_load" 8
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "sselog")
                                             (eq_attr "memory" "none")))))
                         "znver1-double,znver1-fp0|znver1-fp1")
 
+(define_insn_reservation "znver4_sse_comi" 1
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecomi")
+                                  (eq_attr "memory" "none")))
+                        "znver1-double,znver1-fp2|znver1-fp3,znver4-fp-store0")
+
+(define_insn_reservation "znver4_sse_comi_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecomi")
+                                  (eq_attr "memory" "load")))
+                        "znver1-double,znver4-load,znver1-fp2|znver1-fp3,znver4-fp-store0")
+
 (define_insn_reservation "znver1_sse_comi_double_load" 10
                         (and (ior (and (eq_attr "cpu" "znver1")
                                        (eq_attr "mode" "V4SF,V2DF,TI"))
                         (and (ior (and (eq_attr "cpu" "znver1")
                                        (eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
                                   (ior (eq_attr "cpu" "znver2")
-                                       (eq_attr "cpu" "znver3")))
+                                       (eq_attr "cpu" "znver3,znver4")))
                              (and (eq_attr "prefix_extra" "1")
                                   (and (eq_attr "type" "ssecomi")
                                        (eq_attr "memory" "none"))))
                                        (eq_attr "memory" "load"))))
                         "znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
 
+(define_insn_reservation "znver4_sse_test_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "prefix_extra" "1")
+                                  (and (eq_attr "type" "ssecomi")
+                                       (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-fp1|znver1-fp2")
+
 ;; SSE moves
 ;; Fix me:  Need to revist this again some of the moves may be restricted
 ;; to some fpu pipes.
                         "znver1-direct,znver1-ieu0")
 
 (define_insn_reservation "znver2_sse_mov" 1
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "mode" "SI")
                                   (and (eq_attr "isa" "avx")
                                        (and (eq_attr "type" "ssemov")
                         "znver1-direct,znver1-ieu2")
 
 (define_insn_reservation "znver2_avx_mov" 1
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "mode" "TI")
                                   (and (eq_attr "isa" "avx")
                                        (and (eq_attr "type" "ssemov")
                         (and (ior (and (eq_attr "cpu" "znver1")
                                        (eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
                                   (ior (eq_attr "cpu" "znver2")
-                                             (eq_attr "cpu" "znver3")))
+                                       (ior (eq_attr "cpu" "znver3")
+                                        (eq_attr "cpu" "znver4"))))
                              (and (eq_attr "type" "ssemov")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fpu")
                                        (eq_attr "memory" "store"))))
                        "znver1-direct,znver1-fpu,znver1-store")
 (define_insn_reservation "znver2_sseavx_mov_store" 1
-                        (and (eq_attr "cpu" "znver2,znver3")
+                        (and (eq_attr "cpu" "znver2,znver3,znver4")
                              (and (eq_attr "type" "ssemov")
                                   (eq_attr "memory" "store")))
                        "znver1-direct,znver1-fpu,znver2-store")
                                    (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fpu")
 
+(define_insn_reservation "znver4_sseavx_mov_load" 8
+                        (and (eq_attr "cpu" "znver4")
+                                  (and (eq_attr "type" "ssemov")
+                                       (eq_attr "memory" "load")))
+                        "znver1-double,znver4-load,znver1-fpu")
+
 (define_insn_reservation "znver1_avx256_mov" 1
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "V8SF,V4DF,OI")
                         (and (ior (and (eq_attr "cpu" "znver1")
                                        (eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
                                   (ior (eq_attr "cpu" "znver2")
-                                       (eq_attr "cpu" "znver3")))
+                                   (ior (eq_attr "cpu" "znver3")
+                                        (eq_attr "cpu" "znver4"))))
                              (and (eq_attr "type" "sseadd")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp2|znver1-fp3")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp2|znver1-fp3")
 
+(define_insn_reservation "znver4_sseavx_add_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                                  (and (eq_attr "type" "sseadd")
+                                       (eq_attr "memory" "load")))
+                        "znver1-double,znver4-load,znver1-fp2|znver1-fp3")
+
 (define_insn_reservation "znver1_avx256_add" 3
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "V8SF,V4DF,OI")
                                        (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp0|znver1-fp1")
 
+(define_insn_reservation "znver4_sseavx_fma" 4
+                        (and (and (eq_attr "cpu" "znver4")
+                             (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
+                                  (and (eq_attr "type" "ssemuladd")
+                                       (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sseavx_fma_evex" 4
+                        (and (and (eq_attr "cpu" "znver4")
+                             (eq_attr "mode" "V16SF,V8DF"))
+                                  (and (eq_attr "type" "ssemuladd")
+                                       (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-fp0+znver1-fp1")
+
 (define_insn_reservation "znver3_sseavx_fma_load" 11
                         (and (and (eq_attr "cpu" "znver3")
                               (eq_attr "mode" "SF,DF,V4SF,V2DF"))
                                          (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
 
+(define_insn_reservation "znver4_sseavx_fma_load" 11
+                        (and (and (eq_attr "cpu" "znver4")
+                             (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
+                                  (and (eq_attr "type" "ssemuladd")
+                                       (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sseavx_fma_evex_load" 11
+                        (and (and (eq_attr "cpu" "znver4")
+                             (eq_attr "mode" "V16SF,V8DF"))
+                                  (and (eq_attr "type" "ssemuladd")
+                                       (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fp0+znver1-fp1")
+
 (define_insn_reservation "znver3_avx256_fma" 4
                         (and (eq_attr "cpu" "znver3")
                              (and (eq_attr "mode" "V8SF,V4DF")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
 
+(define_insn_reservation "znver4_sseavx_iadd" 1
+                        (and (and (eq_attr "cpu" "znver4")
+                             (eq_attr "mode" "QI,HI,SI,DI,TI,OI,XI"))
+                                  (and (eq_attr "type" "sseiadd")
+                                       (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-fpu")
+
+(define_insn_reservation "znver4_sseavx_iadd_load" 8
+                        (and (and (eq_attr "cpu" "znver4")
+                             (eq_attr "mode" "QI,HI,SI,DI,TI,OI,XI"))
+                                  (and (eq_attr "type" "sseiadd")
+                                       (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fpu")
+
 (define_insn_reservation "znver1_sseavx_iadd_load" 8
                         (and (ior (and (eq_attr "cpu" "znver1")
                                        (eq_attr "mode" "DI,TI"))
                                             (eq_attr "memory" "load")))))
                         "znver1-double,znver1-load,znver1-fp3,znver1-ieu0")
 
+(define_insn_reservation "znver4_ssecvtsfdf_si" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "SI")
+                                  (and (eq_attr "type" "sseicvt")
+                                           (eq_attr "memory" "none"))))
+                        "znver1-double,znver1-fp2|znver1-fp3,znver4-fp-store0")
+
+(define_insn_reservation "znver4_ssecvtsfdf_si_load" 11
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "SI")
+                                  (and (eq_attr "type" "sseicvt")
+                                           (eq_attr "memory" "load"))))
+                        "znver1-double,znver4-load,znver1-fp2|znver1-fp3,znver4-fp-store0")
+
+(define_insn_reservation "znver4_ssecvtsfdf_di" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "DI")
+                                  (and (eq_attr "type" "sseicvt")
+                                           (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-fp2|znver1-fp3")
+
+(define_insn_reservation "znver4_ssecvtsfdf_di_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "DI")
+                                  (and (eq_attr "type" "sseicvt")
+                                           (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-fp2|znver1-fp3")
 
 ;; All other used ssecvt fp3 pipes
 ;; Check: Need to revisit this again.
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp3")
 
+(define_insn_reservation "znver4_ssecvt" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (eq_attr "memory" "none")))
+                        "znver1-direct,znver1-fp2|znver1-fp3")
+
 (define_insn_reservation "znver1_ssecvt_load" 11
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "ssecvt")
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp3")
 
+(define_insn_reservation "znver4_ssecvt_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (eq_attr "memory" "load")))
+                        "znver1-direct,znver4-load,znver1-fp2|znver1-fp3")
+
 ;; SSE div
 (define_insn_reservation "znver1_ssediv_ss_ps" 10
                         (and (ior (and (eq_attr "cpu" "znver1")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp3*10")
 
+(define_insn_reservation "znver4_ssediv_ss_ps" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF")
+                                   (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-fp3*10")
+
+(define_insn_reservation "znver4_ssediv_ss_ps_evex" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF")
+                                   (and (eq_attr "prefix" "evex")
+                                    (eq_attr "memory" "none")))))
+                        "znver1-direct,znver1-fp1*10")
+
 (define_insn_reservation "znver1_ssediv_ss_ps_load" 17
                         (and (ior (and (eq_attr "cpu" "znver1")
                                        (eq_attr "mode" "V4SF,SF"))
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp3*10")
 
+(define_insn_reservation "znver4_ssediv_ss_ps_load" 17
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF")
+                                   (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-fp3*10")
+
+(define_insn_reservation "znver4_ssediv_ss_ps_evex_load" 17
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF")
+                                   (and (eq_attr "prefix" "evex")
+                                    (eq_attr "memory" "load")))))
+                        "znver1-direct,znver4-load,znver1-fp1*10")
+
 (define_insn_reservation "znver1_ssediv_sd_pd" 13
                         (and (ior (and (eq_attr "cpu" "znver1")
                                        (eq_attr "mode" "V2DF,DF"))
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp3*13")
 
+(define_insn_reservation "znver4_ssediv_sd_pd" 13
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF")
+                                   (eq_attr "memory" "none"))))
+                        "znver1-direct,znver1-fp3*13")
+
+(define_insn_reservation "znver4_ssediv_sd_pd_evex" 13
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF")
+                                   (and (eq_attr "prefix" "evex")
+                                    (eq_attr "memory" "none")))))
+                        "znver1-direct,znver1-fp1*13")
+
 (define_insn_reservation "znver1_ssediv_sd_pd_load" 20
                         (and (ior (and (eq_attr "cpu" "znver1")
                                               (eq_attr "mode" "V2DF,DF"))
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp3*13")
 
+(define_insn_reservation "znver4_ssediv_sd_pd_load" 20
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF")
+                                   (eq_attr "memory" "load"))))
+                        "znver1-direct,znver4-load,znver1-fp3*13")
+
+(define_insn_reservation "znver4_ssediv_sd_pd_evex_load" 20
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssecvt")
+                                  (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF")
+                                   (and (eq_attr "prefix" "evex")
+                                    (eq_attr "memory" "load")))))
+                        "znver1-direct,znver4-load,znver1-fp1*13")
+
 (define_insn_reservation "znver1_ssediv_avx256_ps" 12
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "V8SF")
                                        (eq_attr "mode" "V4SF,SF"))
                                   (and (eq_attr "cpu" "znver2")
                                              (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF"))
-                                  (and (eq_attr "cpu" "znver3")
+                                  (and (eq_attr "cpu" "znver3,znver4")
                                              (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")))
                              (and (eq_attr "type" "ssemul")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,(znver1-fp0|znver1-fp1)*3")
 
+(define_insn_reservation "znver4_ssemul_ss_ps_evex" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssemul")
+                                  (and (eq_attr "mode" "V8DF,V16SF")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-direct,(znver1-fp0+znver1-fp1)*3")
+
 (define_insn_reservation "znver1_ssemul_ss_ps_load" 10 
                         (and (ior (and (eq_attr "cpu" "znver1")
                                        (eq_attr "mode" "V4SF,SF"))
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
 
+(define_insn_reservation "znver4_ssemul_ss_ps_evex_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "type" "ssemul")
+                                  (and (eq_attr "mode" "V8DF,V16SF")
+                                       (eq_attr "memory" "none"))))
+                        "znver1-direct,znver4-load,(znver1-fp0+znver1-fp1)*3")
+
 (define_insn_reservation "znver1_ssemul_avx256_ps" 3
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "V8SF")
                                        (eq_attr "mode" "TI"))
                                   (and (eq_attr "cpu" "znver2")
                                              (eq_attr "mode" "TI,OI"))
-                                  (and (eq_attr "cpu" "znver3")
+                                  (and (eq_attr "cpu" "znver3,znver4")
                                              (eq_attr "mode" "TI,OI")))
                              (and (eq_attr "type" "sseimul")
                                   (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp0*3")
 
+(define_insn_reservation "znver4_sseimul" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "TI,OI")
+                                  (and (eq_attr "type" "sseimul")
+                                   (and (eq_attr "prefix" "evex")
+                                        (eq_attr "memory" "none")))))
+                        "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sseimul_evex" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "XI")
+                                  (and (eq_attr "type" "sseimul")
+                                   (and (eq_attr "prefix" "evex")
+                                        (eq_attr "memory" "none")))))
+                        "znver1-direct,znver1-fp0+znver1-fp1")
+
+(define_insn_reservation "znver4_sseimul_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "TI,OI")
+                                  (and (eq_attr "type" "sseimul")
+                                   (and (eq_attr "prefix" "evex")
+                                        (eq_attr "memory" "load")))))
+                        "znver1-direct,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sseimul_evex_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "XI")
+                                  (and (eq_attr "type" "sseimul")
+                                   (and (eq_attr "prefix" "evex")
+                                        (eq_attr "memory" "load")))))
+                        "znver1-direct,znver4-load,znver1-fp0+znver1-fp1")
+
 (define_insn_reservation "znver1_sseimul_avx256" 4
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "mode" "OI")
                                        (eq_attr "mode" "SF,DF,V4SF,V2DF"))
                                   (and (eq_attr "cpu" "znver2")
                                              (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
-                                  (and (eq_attr "cpu" "znver3")
+                                  (and (eq_attr "cpu" "znver3,znver4")
                                              (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
                               (and (eq_attr "type" "ssecmp")
                                    (eq_attr "memory" "none")))
                         "znver1-direct,znver1-fp0|znver1-fp1")
 
+(define_insn_reservation "znver4_sse_cmp" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "SF,DF,V4SF,V2DF")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "none"))))))
+                       "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "SF,DF,V4SF,V2DF")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "load"))))))
+                       "znver1-double,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_vex" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "V8SF,V4DF")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "none"))))))
+                       "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_vex_load" 11
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "V8SF,V4DF")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "load"))))))
+                       "znver1-double,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_evex" 5
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "V16SF,V8DF")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "none"))))))
+                       "znver1-direct,znver1-fp0+znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_evex_load" 12
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "V16SF,V8DF")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "load"))))))
+                       "znver1-double,znver4-load,znver1-fp0+znver1-fp1")
+
 (define_insn_reservation "znver1_sse_cmp_load" 8
                         (and (ior (and (eq_attr "cpu" "znver1")
                                         (eq_attr "mode" "SF,DF,V4SF,V2DF"))
                                               (eq_attr "mode" "QI,HI,SI,DI,TI"))
                                   (and (eq_attr "cpu" "znver2")
                                              (eq_attr "mode" "QI,HI,SI,DI,TI,OI"))
-                                  (and (eq_attr "cpu" "znver3")
+                                  (and (eq_attr "cpu" "znver3,znver4")
                                              (eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
                              (and (eq_attr "type" "ssecmp")
                                   (eq_attr "memory" "none")))
                                   (eq_attr "memory" "load")))
                         "znver1-direct,znver1-load,znver1-fp0|znver1-fp3")
 
+(define_insn_reservation "znver4_sse_icmp" 3
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "QI,HI,SI,DI,TI")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "none"))))))
+                       "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_icmp_load" 10
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "QI,HI,SI,DI,TI")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "load"))))))
+                       "znver1-double,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_icmp_vex" 4
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "OI")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "none"))))))
+                       "znver1-direct,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_cmp_ivex_load" 11
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "OI")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "load"))))))
+                       "znver1-double,znver4-load,znver1-fp0|znver1-fp1")
+
+(define_insn_reservation "znver4_sse_icmp_evex" 5
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "XI")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "none"))))))
+                       "znver1-direct,znver1-fp0+znver1-fp1")
+
+(define_insn_reservation "znver4_sse_icmp_evex_load" 12
+                        (and (eq_attr "cpu" "znver4")
+                             (and (eq_attr "mode" "XI")
+                                  (and (eq_attr "type" "ssecmp")
+                                   (and (eq_attr "prefix" "evex")
+                                        (and (eq_attr "length_immediate" "1")
+                                         (eq_attr "memory" "load"))))))
+                       "znver1-double,znver4-load,znver1-fp0+znver1-fp1")
+
 (define_insn_reservation "znver1_sse_icmp_avx256" 1
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "OI")
index e2dbc1dbc46866ac02a7ec1b43ca5b0e9757d6be..ba90bfa6b157e84661d37dcdbee633d273bc7aac 100644 (file)
@@ -21935,6 +21935,9 @@ AMD Family 19h CPU.
 @item znver3
 AMD Family 19h Zen version 3.
 
+@item znver4
+AMD Family 19h Zen version 4.
+
 @item x86-64
 Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
 
index 4df29179bf8cabbf584fd541ed6cd269d94a7610..09548c4528c0a25db5dab398a1e08e96a361aab2 100644 (file)
@@ -32172,6 +32172,15 @@ MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A,
 SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID,
 WBNOINVD, PKU, VPCLMULQDQ, VAES, and 64-bit instruction set extensions.)
 
+@item znver4
+AMD Family 19h core based CPUs with x86-64 instruction set support. (This
+supersets BMI, BMI2, CLWB, F16C, FMA, FSGSBASE, AVX, AVX2, ADCX, RDSEED,
+MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A,
+SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID,
+WBNOINVD, PKU, VPCLMULQDQ, VAES, AVX512F, AVX512DQ, AVX512IFMA, AVX512CD,
+AVX512BW, AVX512VL, AVX512BF16, AVX512VBMI, AVX512VBMI2, AVX512VNNI,
+AVX512BITALG, AVX512VPOPCNTDQ, GFNI and 64-bit instruction set extensions.)
+
 @item btver1
 CPUs based on AMD Family 14h cores with x86-64 instruction set support.  (This
 supersets MMX, SSE, SSE2, SSE3, SSSE3, SSE4A, CX16, ABM and 64-bit
index c7723e3168a4c1b85d7d910f3f5570ea315c8a29..a8dd8ac4803b6366294b62e8ec26c8a64a123b18 100644 (file)
@@ -49,6 +49,9 @@ int __attribute__ ((target("arch=znver3"))) foo () {
   return 9;
 }
 
+int __attribute__ ((target("arch=znver4"))) foo () {
+  return 10;
+}
 
 int main ()
 {
@@ -72,6 +75,8 @@ int main ()
     assert (val == 8);
   else if (__builtin_cpu_is ("znver3"))
     assert (val == 9);
+  else if (__builtin_cpu_is ("znver4"))
+    assert (val == 10);
   else
     assert (val == 0);
 
index a681bffe3e7660e634e5080fa9191afdb268e881..fada66bb8bc1f8a1618a39ca406b6cc6d9836cf8 100644 (file)
@@ -204,6 +204,7 @@ extern void test_arch_bdver3 (void)         __attribute__((__target__("arch=bdver3")));
 extern void test_arch_znver1 (void)             __attribute__((__target__("arch=znver1")));
 extern void test_arch_znver2 (void)             __attribute__((__target__("arch=znver2")));
 extern void test_arch_znver3 (void)             __attribute__((__target__("arch=znver3")));
+extern void test_arch_znver4 (void)             __attribute__((__target__("arch=znver4")));
 
 extern void test_tune_nocona (void)            __attribute__((__target__("tune=nocona")));
 extern void test_tune_core2 (void)             __attribute__((__target__("tune=core2")));
@@ -227,6 +228,7 @@ extern void test_tune_generic (void)                __attribute__((__target__("tune=generic"))
 extern void test_tune_znver1 (void)             __attribute__((__target__("tune=znver1")));
 extern void test_tune_znver2 (void)             __attribute__((__target__("tune=znver2")));
 extern void test_tune_znver3 (void)             __attribute__((__target__("tune=znver3")));
+extern void test_tune_znver4 (void)             __attribute__((__target__("tune=znver4")));
 
 extern void test_fpmath_sse (void)             __attribute__((__target__("sse2,fpmath=sse")));
 extern void test_fpmath_387 (void)             __attribute__((__target__("sse2,fpmath=387")));