2, /* Small unroll factor. */
};
+/* yongfeng_cost should produce code tuned for ZHAOXIN yongfeng CPU. */
+static stringop_algs yongfeng_memcpy[2] = {
+ {libcall, {{6, unrolled_loop, true}, {256, unrolled_loop, false},
+ {-1, libcall, false}}},
+ {libcall, {{8, loop, false}, {512, unrolled_loop, false},
+ {-1, libcall, false}}}};
+static stringop_algs yongfeng_memset[2] = {
+ {libcall, {{6, loop_1_byte, false}, {128, loop, false},
+ {-1, libcall, false}}},
+ {libcall, {{2, rep_prefix_4_byte, false}, {64, loop, false},
+ {1024, vector_loop, false},
+ {-1, libcall, false}}}};
+static const
+struct processor_costs yongfeng_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 8, /* cost for loading QImode using movzbl. */
+ {8, 8, 8}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer registers. */
+ 2, /* cost of reg,reg fld/fst. */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode. */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode. */
+ 2, /* cost of moving MMX register. */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode. */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode. */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
+ {8, 8, 8, 10, 15}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit. */
+ {8, 8, 8, 10, 15}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit. */
+ 8, 8, /* SSE->integer and integer->SSE moves. */
+ 8, 8, /* mask->integer and integer->mask moves. */
+ {8, 8, 8}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {8, 8, 8}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
+ /* End of register allocator costs. */
+ },
+
+ COSTS_N_INSNS (1), /* cost of an add instruction. */
+ COSTS_N_INSNS (1), /* cost of a lea instruction. */
+ COSTS_N_INSNS (1), /* variable shift costs. */
+ COSTS_N_INSNS (1), /* constant shift costs. */
+ {COSTS_N_INSNS (2), /* cost of starting multiply for QI. */
+ COSTS_N_INSNS (3), /* HI. */
+ COSTS_N_INSNS (2), /* SI. */
+ COSTS_N_INSNS (2), /* DI. */
+ COSTS_N_INSNS (3)}, /* other. */
+ 0, /* cost of multiply per each bit set. */
+ {COSTS_N_INSNS (8), /* cost of a divide/mod for QI. */
+ COSTS_N_INSNS (9), /* HI. */
+ COSTS_N_INSNS (8), /* SI. */
+ COSTS_N_INSNS (41), /* DI. */
+ COSTS_N_INSNS (41)}, /* other. */
+ COSTS_N_INSNS (1), /* cost of movsx. */
+ COSTS_N_INSNS (1), /* cost of movzx. */
+ 8, /* "large" insn. */
+ 17, /* MOVE_RATIO. */
+ 6, /* CLEAR_RATIO. */
+ {8, 8, 8}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer registers. */
+ {8, 8, 8, 12, 15}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit. */
+ {8, 8, 8, 12, 15}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit. */
+ {8, 8, 8, 12, 15}, /* cost of unaligned loads. */
+ {8, 8, 8, 12, 15}, /* cost of unaligned storess. */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
+ 8, /* cost of moving SSE register to integer. */
+ 18, 6, /* Gather load static, per_elt. */
+ 18, 6, /* Gather store static, per_elt. */
+ 32, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
+ 64, /* size of prefetch block. */
+ 12, /* number of parallel prefetches. */
+ 3, /* Branch cost. */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (3), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (14), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+
+ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
+ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_INSNS (3), /* cost of MULSS instruction. */
+ COSTS_N_INSNS (3), /* cost of MULSD instruction. */
+ COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
+ COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
+ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
+ COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
+ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
+ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
+ yongfeng_memcpy,
+ yongfeng_memset,
+ COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
+ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:11:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
+ 4, /* Small unroll limit. */
+ 2, /* Small unroll factor. */
+};
+
+
/* Generic should produce code tuned for Core-i7 (and newer chips)
and btver1 (and newer chips). */
/* X86_TUNE_SCHEDULE: Enable scheduling. */
DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
- | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_LUJIAZUI
+ | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
| m_CORE_ATOM | m_GENERIC)
DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2
| m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL
- | m_KNL | m_KNM | m_AMD_MULTIPLE | m_LUJIAZUI | m_TREMONT
+ | m_KNL | m_KNM | m_AMD_MULTIPLE | m_ZHAOXIN | m_TREMONT
| m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
that can be partly masked by careful scheduling of moves. */
DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
- | m_BDVER | m_ZNVER | m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID
+ | m_BDVER | m_ZNVER | m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID
| m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids
DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
"sse_partial_reg_fp_converts_dependency",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
- | m_BDVER | m_ZNVER | m_LUJIAZUI | m_CORE_HYBRID | m_CORE_ATOM
+ | m_BDVER | m_ZNVER | m_ZHAOXIN | m_CORE_HYBRID | m_CORE_ATOM
| m_GENERIC)
/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial
DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
"sse_partial_reg_converts_dependency",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
- | m_BDVER | m_ZNVER | m_LUJIAZUI | m_CORE_HYBRID | m_CORE_ATOM
+ | m_BDVER | m_ZNVER | m_ZHAOXIN | m_CORE_HYBRID | m_CORE_ATOM
| m_GENERIC)
/* X86_TUNE_DEST_FALSE_DEP_FOR_GLC: This knob inserts zero-idiom before
DEF_TUNE (X86_TUNE_MOVX, "movx",
m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
| m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL
- | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_LUJIAZUI
+ | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
| m_CORE_AVX2 | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
| m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE
- | m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
+ | m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
conditional jump instruction for 32 bit TARGET. */
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32",
- m_CORE_ALL | m_BDVER | m_ZNVER | m_LUJIAZUI | m_GENERIC)
+ m_CORE_ALL | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent
conditional jump instruction for TARGET_64BIT. */
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64",
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER
- | m_ZNVER | m_LUJIAZUI | m_GENERIC)
+ | m_ZNVER | m_ZHAOXIN | m_GENERIC)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a
subsequent conditional jump instruction when the condition jump
check sign flag (SF) or overflow flag (OF). */
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER
- | m_ZNVER | m_LUJIAZUI | m_GENERIC)
+ | m_ZNVER | m_ZHAOXIN | m_GENERIC)
/* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
jump instruction when the alu instruction produces the CCFLAG consumed by
the conditional jump instruction. */
DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
- m_SANDYBRIDGE | m_CORE_AVX2 | m_LUJIAZUI | m_GENERIC)
+ m_SANDYBRIDGE | m_CORE_AVX2 | m_ZHAOXIN | m_GENERIC)
/*****************************************************************************/
DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
- | m_GOLDMONT | m_GOLDMONT_PLUS | m_ATHLON_K8 | m_LUJIAZUI)
+ | m_GOLDMONT | m_GOLDMONT_PLUS | m_ATHLON_K8 | m_ZHAOXIN)
/* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
considered on critical path. */
/* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits. */
DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave",
- m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_LUJIAZUI
+ m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
| m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions.
and push instructions. */
DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory",
m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE
- | m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
+ | m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
over esp subtraction. */
/* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
on 16-bit immediate moves into memory on Core2 and Corei7. */
-DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_LUJIAZUI | m_GENERIC)
+DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_ZHAOXIN | m_GENERIC)
/* X86_TUNE_READ_MODIFY: Enable use of read-modify instructions such
as "add mem, reg". */
~(m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
| m_BONNELL | m_SILVERMONT | m_INTEL | m_KNL | m_KNM | m_GOLDMONT
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
- | m_LUJIAZUI | m_GENERIC))
+ | m_ZHAOXIN | m_GENERIC))
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
- | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_LUJIAZUI
+ | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
| m_CORE_ATOM | m_GENERIC))
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL
- | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL | m_LUJIAZUI)
+ | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL | m_ZHAOXIN)
/* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */
DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB,
"prefer_known_rep_movsb_stosb",
m_SKYLAKE | m_CORE_HYBRID | m_CORE_ATOM | m_TREMONT | m_CORE_AVX512
- | m_LUJIAZUI)
+ | m_ZHAOXIN)
/* X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES: Enable generation of
compact prologues and epilogues by issuing a misaligned moves. This
FIXME: This may actualy be a win on more targets than listed here. */
DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
"misaligned_move_string_pro_epilogues",
- m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_LUJIAZUI | m_TREMONT
+ m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_ZHAOXIN | m_TREMONT
| m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_USE_SAHF: Controls use of SAHF. */
DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
- | m_BTVER | m_ZNVER | m_LUJIAZUI | m_GOLDMONT | m_GOLDMONT_PLUS
+ | m_BTVER | m_ZNVER | m_ZHAOXIN | m_GOLDMONT | m_GOLDMONT_PLUS
| m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
/* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */
DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
- | m_LAKEMONT | m_AMD_MULTIPLE | m_LUJIAZUI | m_GOLDMONT
+ | m_LAKEMONT | m_AMD_MULTIPLE | m_ZHAOXIN | m_GOLDMONT
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
| m_GENERIC)
DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
m_SANDYBRIDGE | m_HASWELL | m_SKYLAKE | m_SKYLAKE_AVX512
| m_CANNONLAKE | m_CASCADELAKE | m_COOPERLAKE
- | m_LUJIAZUI | m_GENERIC)
+ | m_ZHAOXIN | m_GENERIC)
/* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
on hardware capabilities. Bdver3 hardware has a loop buffer which makes
DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
- | m_LUJIAZUI | m_GENERIC)
+ | m_ZHAOXIN | m_GENERIC)
/* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */
DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
- m_CORE_ALL | m_BDVER | m_ZNVER | m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID
+ m_CORE_ALL | m_BDVER | m_ZNVER | m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID
| m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by
(signed) x >> (W-1)) instead of cmove or SSE max/abs instructions. */
DEF_TUNE (X86_TUNE_EXPAND_ABS, "expand_abs",
m_CORE_ALL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT
- | m_GOLDMONT_PLUS | m_LUJIAZUI)
+ | m_GOLDMONT_PLUS | m_ZHAOXIN)
/*****************************************************************************/
/* 387 instruction selection tuning */
DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
| m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE
- | m_LUJIAZUI | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
+ | m_ZHAOXIN | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
| m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC))
/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */
-DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_LUJIAZUI)
+DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_ZHAOXIN)
/* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */
DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
- | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_LUJIAZUI
+ | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_ZHAOXIN
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
| m_CORE_ATOM | m_GENERIC)
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
| m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
- | m_CORE_ATOM | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_LUJIAZUI
+ | m_CORE_ATOM | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_ZHAOXIN
| m_GENERIC)
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
| m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
- | m_CORE_ATOM | m_BDVER | m_ZNVER | m_LUJIAZUI | m_GENERIC)
+ | m_CORE_ATOM | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC)
/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single
precision 128bit instructions instead of double where possible. */
/* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */
DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
- m_AMD_MULTIPLE | m_LUJIAZUI | m_CORE_ALL | m_TREMONT | m_CORE_HYBRID
+ m_AMD_MULTIPLE | m_ZHAOXIN | m_CORE_ALL | m_TREMONT | m_CORE_HYBRID
| m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to
xorps/xorpd and other variants. */
DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER
- | m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
+ | m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
/* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer
to SSE registers. If disabled, the moves will be done by storing
elements. */
DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
- | m_CORE_ATOM | m_GENERIC | m_GDS))
+ | m_YONGFENG | m_CORE_ATOM | m_GENERIC | m_GDS))
/* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
elements. */
elements. */
DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
- | m_CORE_ATOM | m_GENERIC | m_GDS))
+ | m_YONGFENG | m_CORE_ATOM | m_GENERIC | m_GDS))
/* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
elements. */
elements. */
DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_CORE_HYBRID | m_CORE_ATOM
- | m_GENERIC | m_GDS))
+ | m_YONGFENG | m_GENERIC | m_GDS))
/* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
elements. */
/* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
smaller FMA chain. */
-DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3)
+DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3
+ | m_YONGFENG)
/* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
smaller FMA chain. */
--- /dev/null
+;; Copyright (C) 2012-2023 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+
+;; ZHAOXIN yongfeng processor Scheduling
+;; Modeling automatons for yongfeng decoders, integer execution pipes,
+;; FP execution pipes, AGU pipes, and dividers.
+(define_automaton "yongfeng_decoder,yongfeng_ieu,yongfeng_fp,yongfeng_agu,yongfeng_idiv,yongfeng_fdiv")
+
+;; The rules for the decoder are simple:
+;; - an instruction with 1 uop can be decoded by any of the four
+;; decoders in one cycle.
+;; - an instruction with 2 uops can be decoded by decoder 0 or decoder 1
+;; or decoder 2 but still in only one cycle.
+;; - a complex (microcode) instruction can only be decoded by
+;; decoder 0, and this takes an unspecified number of cycles.
+;;
+;; The goal is to schedule such that we have a few-one-two uops sequence
+;; in each cycle, to decode as many instructions per cycle as possible.
+(define_cpu_unit "yf_decoder0" "yongfeng_decoder")
+(define_cpu_unit "yf_decoder1" "yongfeng_decoder")
+(define_cpu_unit "yf_decoder2" "yongfeng_decoder")
+(define_cpu_unit "yf_decoder3" "yongfeng_decoder")
+
+;; We first wish to find an instruction for yf_decoder0, so exclude
+;; other decoders from being reserved until yf_decoder0 is
+;; reserved
+(presence_set "yf_decoder1" "yf_decoder0")
+(presence_set "yf_decoder2" "yf_decoder0")
+(presence_set "yf_decoder3" "yf_decoder0")
+
+;; Most instructions can be decoded on any of the three decoders.
+(define_reservation "yf_decodern" "yf_decoder0|yf_decoder1|yf_decoder2|yf_decoder3")
+(define_reservation "yf_decoder012" "yf_decoder0|yf_decoder1|yf_decoder2")
+
+;; The out-of-order core has ten pipelines. Port 0,1,2,3 are integer execution
+;; pipelines, port 4, 5 are responsible for address calculation, load and store,
+;; port 6,7,8,9 are FP pipelines.
+(define_cpu_unit "yf_p0,yf_p1,yf_p2,yf_p3" "yongfeng_ieu")
+(define_cpu_unit "yf_p4,yf_p5" "yongfeng_agu")
+(define_cpu_unit "yf_p6,yf_p7,yf_p8,yf_p9" "yongfeng_fp")
+
+(define_cpu_unit "yf_idiv" "yongfeng_idiv")
+(define_cpu_unit "yf_fdiv" "yongfeng_fdiv")
+
+(define_reservation "yf_ieu" "yf_p0|yf_p1|yf_p2|yf_p3")
+(define_reservation "yf_p01" "yf_p0|yf_p1")
+(define_reservation "yf_agu" "yf_p4|yf_p5")
+(define_reservation "yf_feu" "yf_p6|yf_p7|yf_p8|yf_p9")
+
+;; Only the irregular instructions have to be modeled here.
+
+;; Complex instruction.
+(define_insn_reservation "yongfeng_complex_insn" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (eq_attr "type" "other,multi,str"))
+ "yf_decoder0")
+
+;; Call instruction.
+(define_insn_reservation "yongfeng_call" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (eq_attr "type" "call,callv"))
+ "yf_decoder012,yf_agu,yf_ieu*3")
+;; Push and pop.
+(define_insn_reservation "yongfeng_push_reg" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "push")))
+ "yf_decodern,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_push_mem" 4
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "both")
+ (eq_attr "type" "push")))
+ "yf_decoder012,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_pop_reg" 4
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "pop")))
+ "yf_decoder012,yf_p01,yf_agu")
+
+(define_insn_reservation "yongfeng_pop_mem" 4
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "both")
+ (eq_attr "type" "pop")))
+ "yf_decoder0,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_leave" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (eq_attr "type" "leave"))
+ "yf_decoder0,yf_agu,yf_p01*3")
+
+;; MOV - integer moves.
+(define_insn_reservation "yongfeng_imov" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "imov,imovx")))
+ "yf_decodern,yf_ieu")
+
+(define_insn_reservation "yongfeng_imov_load" 4
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "imov")))
+ "yf_decodern,yf_agu")
+
+(define_insn_reservation "yongfeng_imovx_load" 4
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "imovx")))
+ "yf_decoder012,yf_agu,yf_ieu|yf_ieu")
+
+(define_insn_reservation "yongfeng_imov_store" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "imov")))
+ "yf_decodern,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_int_insn" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none,unknown")
+ (eq_attr "type" "alu,alu1,icmov,icmp,test,lea,ishift1,rotate,rotate1,setcc,incdec")))
+ "yf_decodern,yf_ieu")
+
+(define_insn_reservation "yongfeng_int_insn_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "alu,alu1,icmov,icmp,test,ishift1,rotate,rotate1,setcc")))
+ "yf_decoder012,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_int_insn_store" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "alu,alu1,icmp,test,ishift1,rotate,rotate1,setcc")))
+ "yf_decoder012,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_int_insn_both" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "both")
+ (eq_attr "type" "alu,alu1,icmp,test,ishift1,rotate,rotate1,setcc,incdec")))
+ "yf_decoder012,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_shift_HI" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none,unknown")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "ishift"))))
+ "yf_decoder0,yf_ieu")
+
+(define_insn_reservation "yongfeng_shift_SIDI" 2
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none,unknown")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "type" "ishift"))))
+ "yf_decoder0,yf_ieu")
+
+(define_insn_reservation "yongfeng_shift_HI_mem" 9
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "ishift"))))
+ "yf_decoder0,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_shift_SIDI_mem" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "type" "ishift"))))
+ "yf_decoder0,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_negnot_QIHI" 2
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none,unknown")
+ (and (eq_attr "mode" "QI,HI")
+ (eq_attr "type" "negnot"))))
+ "yf_decoder012,yf_ieu|yf_ieu")
+
+(define_insn_reservation "yongfeng_negnot_SIDI" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none,unknown")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "type" "negnot"))))
+ "yf_decodern,yf_ieu")
+
+(define_insn_reservation "yongfeng_negnot_QIHI_mem" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "QI,HI")
+ (eq_attr "type" "negnot"))))
+ "yf_decoder012,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_negnot_SIDI_mem" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "type" "negnot"))))
+ "yf_decoder012,yf_agu,yf_ieu")
+
+;; branch instruction
+(define_insn_reservation "yongfeng_branch" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ibr")))
+ "yf_decodern,yf_p2*3")
+
+(define_insn_reservation "yongfeng_branch_mem" 7
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "type" "ibr")))
+ "yf_decodern,yf_agu,yf_p2")
+
+;; Integer Multiplication instructions.
+
+(define_insn_reservation "yongfeng_imul_QI" 2
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "type" "imul"))))
+ "yf_decodern,yf_ieu|yf_ieu")
+
+(define_insn_reservation "yongfeng_imul_HI" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "imul"))))
+ "yf_decoder0,yf_ieu")
+
+(define_insn_reservation "yongfeng_imul_SIDI" 2
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "type" "imul"))))
+ "yf_decoder0,yf_ieu|yf_ieu")
+
+(define_insn_reservation "yongfeng_imul_QI_mem" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "type" "imul"))))
+ "yf_decoder012,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_imul_SIDI_mem" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "type" "imul"))))
+ "yf_decoder0,yf_agu,yf_ieu")
+
+(define_insn_reservation "yongfeng_imul_HI_mem" 7
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "imul"))))
+ "yf_decoder0,yf_agu,yf_ieu")
+
+;; Integer Division instructions.
+
+(define_insn_reservation "yongfeng_idiv_DI" 41
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "type" "idiv"))))
+ "yf_decoder0,yf_ieu,yf_feu,yf_idiv*41")
+
+(define_insn_reservation "yongfeng_idiv_HI" 9
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "idiv"))))
+ "yf_decoder0,yf_ieu,yf_feu,yf_idiv*3")
+
+(define_insn_reservation "yongfeng_idiv_QISI" 8
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "QI,SI")
+ (eq_attr "type" "idiv"))))
+ "yf_decoder0,yf_ieu,yf_feu,yf_idiv*3")
+
+
+(define_insn_reservation "yongfeng_idiv_mem_DI" 45
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "type" "idiv"))))
+ "yf_decoder0,yf_agu,yf_ieu,yf_feu,yf_idiv*41")
+
+(define_insn_reservation "yongfeng_idiv_HI_mem" 13
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "idiv"))))
+ "yf_decoder0,yf_agu,yf_ieu,yf_feu,yf_idiv*3")
+
+
+(define_insn_reservation "yongfeng_idiv_QISI_mem" 12
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "QI,SI")
+ (eq_attr "type" "idiv"))))
+ "yf_decoder0,yf_agu,yf_ieu,yf_feu,yf_idiv*3")
+
+;; MMX,SSE,AVX,AVX2 instructions
+;; sse moves
+
+(define_insn_reservation "yongfeng_sse_mov" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssemov"))))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_mov_store" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "ssemov"))))
+ "yf_decodern,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_mov_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssemov"))))
+ "yf_decodern,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_mov" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssemov"))))
+ "yf_decoder012,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_mov_store" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "ssemov"))))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_mov_load" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssemov"))))
+ "yf_decoder012,yf_agu,yf_feu")
+
+;;sse general instructions
+(define_insn_reservation "yongfeng_sse_insns" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp"))))
+ "yf_decodern,yf_feu|yf_feu")
+
+(define_insn_reservation "yongfeng_sse_insns_load" 7
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp"))))
+ "yf_decodern,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_insns" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp"))))
+ "yf_decoder012,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_insns_load" 8
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp"))))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_iadd" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "DI,TI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseiadd"))))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_iadd_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "DI,TI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseiadd"))))
+ "yf_decodern,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_iadd" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "OI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseiadd"))))
+ "yf_decoder012,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_iadd_load" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "OI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseiadd"))))
+ "yf_decoder0,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_iadd1" 2
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseiadd1")))
+ "yf_decoder0,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_iadd1_load" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseiadd1")))
+ "yf_decoder0,yf_agu,yf_feu")
+
+;;sse imul
+(define_insn_reservation "yongfeng_sse_imul" 2
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "DI,TI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseimul"))))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_imul_load" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "DI,TI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseimul"))))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_imul" 2
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "OI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseimul"))))
+ "yf_decoder012,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_imul_load" 7
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "OI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseimul"))))
+ "yf_decoder0,yf_agu,yf_feu")
+
+;; sse FMA
+(define_insn_reservation "yongfeng_sse_fma" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssemuladd"))))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_fma_load" 9
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssemuladd"))))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_fma" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssemuladd"))))
+ "yf_decoder012,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_fma_load" 10
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssemuladd"))))
+ "yf_decoder0,yf_agu,yf_feu")
+;; sse div
+(define_insn_reservation "yongfeng_ssediv_s" 10
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SF,V4SF")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssediv"))))
+ "yf_decodern,yf_fdiv*2")
+
+(define_insn_reservation "yongfeng_ssediv_s_load" 14
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SF,V4SF")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssediv"))))
+ "yf_decodern,yf_agu,yf_fdiv*2")
+
+(define_insn_reservation "yongfeng_ssediv_d" 14
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "DF,V2DF")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssediv"))))
+ "yf_decodern,yf_fdiv*3")
+
+(define_insn_reservation "yongfeng_ssediv_d_load" 18
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "DF,V2DF")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssediv"))))
+ "yf_decodern,yf_agu,yf_fdiv*3")
+
+(define_insn_reservation "yongfeng_ssediv_avx256_s" 10
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssediv"))))
+ "yf_decoder012,yf_fdiv*10")
+
+(define_insn_reservation "yongfeng_ssediv_avx256_s_load" 15
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssediv"))))
+ "yf_decoder012,yf_agu,yf_fdiv*10")
+
+(define_insn_reservation "yongfeng_ssediv_avx256_d" 14
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V4DF")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssediv"))))
+ "yf_decoder012,yf_fdiv*14")
+
+(define_insn_reservation "yongfeng_ssediv_avx256_d_load" 19
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V4DF")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssediv"))))
+ "yf_decoder012,yf_fdiv*14")
+
+;;sse logical and shuffle instructions
+(define_insn_reservation "yongfeng_avx256_log_shuf" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))))
+ "yf_decoder012,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_log_shuf_load" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_log_shuf" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_log_shuf_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")))
+ "yf_decodern,yf_agu,yf_feu")
+;;sse shift
+
+(define_insn_reservation "yongfeng_avx256_shift" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseishft,sseishft1"))))
+ "yf_decoder012,yf_feu")
+
+(define_insn_reservation "yongfeng_avx256_shift_load" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseishft,sseishft1"))))
+ "yf_decoder0,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_shift" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseishft,sseishft1")))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_shift_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseishft,sseishft1")))
+ "yf_decodern,yf_agu,yf_feu")
+;;sse comi
+(define_insn_reservation "yongfeng_avx256_test" 4
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "prefix_extra" "1")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssecomi")))))
+ "yf_decoder012,yf_ieu*3")
+
+(define_insn_reservation "yongfeng_avx256_test_load" 9
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V8SF,V4DF,OI")
+ (and (eq_attr "prefix_extra" "1")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssecomi")))))
+ "yf_decoder012,yf_agu,yf_ieu,yf_p6*3")
+
+(define_insn_reservation "yongfeng_sse_test" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "prefix_extra" "1")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssecomi"))))
+ "yf_decodern,yf_feu|yf_feu")
+
+(define_insn_reservation "yongfeng_sse_test_load" 7
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "prefix_extra" "1")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssecomi"))))
+ "yf_decodern,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_comi" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "prefix_extra" "0")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssecomi"))))
+ "yf_decodern,yf_feu|yf_feu")
+
+(define_insn_reservation "yongfeng_sse_comi_load" 4
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "prefix_extra" "0")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssecomi"))))
+ "yf_decodern,yf_agu,yf_feu")
+
+;;sse conversion
+(define_insn_reservation "yongfeng_avx_cvt_ps" 4
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V4SF")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssecvt"))))
+ "yf_decoder0,yf_feu")
+
+(define_insn_reservation "yongfeng_avx_cvt_ps_load" 8
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V4SF")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssecvt"))))
+ "yf_decoder0,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_avx_cvt_pd" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V4DF")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssecvt"))))
+ "yf_decoder0,yf_feu")
+
+(define_insn_reservation "yongfeng_avx_cvt_pd_load" 7
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "V4DF")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssecvt"))))
+ "yf_decoder0,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_cvt" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ssecvt")))
+ "yf_decodern,yf_feu|yf_feu")
+
+(define_insn_reservation "yongfeng_sse_cvt_load" 7
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "ssecvt")))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_icvt" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseicvt")))
+ "yf_decodern,yf_feu|yf_feu")
+
+(define_insn_reservation "yongfeng_sse_icvt_load" 7
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseicvt")))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_icvt_SI" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseicvt"))))
+ "yf_decoder012,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_icvt_SI_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "SI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseicvt"))))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_icvt_DI" 2
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "DI")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "sseicvt"))))
+ "yf_decoder0,yf_feu")
+
+(define_insn_reservation "yongfeng_sse_icvt_DI_load" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "mode" "DI")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "sseicvt"))))
+ "yf_decoder0,yf_agu,yf_feu")
+;; MMX
+(define_insn_reservation "yongfeng_mmx_move" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxmov")))
+ "yf_decodern,yf_p0")
+
+(define_insn_reservation "yongfeng_mmx_move_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "mmxmov")))
+ "yf_decodern,yf_agu,yf_p0")
+
+(define_insn_reservation "yongfeng_mmx_move_store" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "mmxmov")))
+ "yf_decodern,yf_agu,yf_p0")
+
+(define_insn_reservation "yongfeng_mmx_mul" 2
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxmul")))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_mmx_mul_load" 6
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "mmxmul")))
+ "yf_decoder012,yf_agu,yf_feu")
+
+;; MMX general instructions
+(define_insn_reservation "yongfeng_mmx_insns" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxadd,mmxshft,mmxcmp,mmx,mmxcvt")))
+ "yf_decodern,yf_feu|yf_feu")
+
+(define_insn_reservation "yongfeng_mmx_insns_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "mmxadd,mmxshft,mmxcmp,mmx,mmxcvt")))
+ "yf_decodern,yf_agu,yf_feu|yf_feu")
+
+(define_insn_reservation "yongfeng_mmx_insns_store" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "mmxadd,mmxshft,mmxcmp,mmx,mmxcvt")))
+ "yf_decodern,yf_agu,yf_feu")
+
+;; x87 floating point operations.
+
+(define_insn_reservation "yongfeng_fxch" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (eq_attr "type" "fxch"))
+ "yf_decodern,yf_p0|yf_p1")
+
+(define_insn_reservation "yongfeng_fcmov_sgn" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (eq_attr "type" "fcmov,fsgn"))
+ "yf_decodern,yf_p0|yf_p1,yf_feu")
+
+(define_insn_reservation "yongfeng_fcmp" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "fcmp")))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_fcmp_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "fcmp")))
+ "yf_decodern,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_fmov" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "fmov")))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_fmov_store" 1
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "fmov")))
+ "yf_decoder0,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_fmov_load" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "fmov")))
+ "yf_decoder0,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_fistp" 5
+ (and (eq_attr "cpu" "yongfeng")
+ (eq_attr "type" "fistp,fisttp"))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_fop_mul" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none,unknown")
+ (eq_attr "type" "fop,fmul")))
+ "yf_decodern,yf_feu")
+
+(define_insn_reservation "yongfeng_fop_mul_load" 7
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load,both")
+ (eq_attr "type" "fop,fmul")))
+ "yf_decoder012,yf_agu,yf_feu")
+
+(define_insn_reservation "yf_fop_store" 3
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "fop")))
+ "yf_decodern,yf_agu,yf_feu")
+
+(define_insn_reservation "yongfeng_fdiv_fpspc" 14
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "fdiv,fpspc")))
+ "yf_decodern,yf_fdiv*7")
+
+(define_insn_reservation "yongfeng_fdiv_fpspc_load" 18
+ (and (eq_attr "cpu" "yongfeng")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "fdiv,fpspc")))
+ "yf_decoder012,yf_agu,yf_fdiv*7")