;; HYGON c86-4g-m7 Scheduling
;; Modeling automatons for decoders, integer execution pipes,
;; AGU pipes, branch, floating point execution, fp store units,
-;; integer and floating point dividers.
-(define_automaton "c86_4g_m7, c86_4g_m7_ieu, c86_4g_m7_agu, c86_4g_m7_fpu, c86_4g_m7_idiv, c86_4g_m7_fdiv")
+;; integer and floating point dividers. Split fpu1 and fpu3
+;; into their own automata to keep these units independent
+;; without increasing the main c86_4g_m7_fpu state space.
+(define_automaton "c86_4g_m7, c86_4g_m7_ieu, c86_4g_m7_agu, c86_4g_m7_fpu02, c86_4g_m7_fpu13, c86_4g_m7_idiv, c86_4g_m7_fdiv")
;; Decoders unit has 4 decoders and all of them can decode fast path
;; and vector type instructions.
(define_cpu_unit "c86-4g-m7-decode2" "c86_4g_m7")
(define_cpu_unit "c86-4g-m7-decode3" "c86_4g_m7")
-;; Two separated dividers for int and fp.
-(define_cpu_unit "c86-4g-m7-idiv" "c86_4g_m7_idiv")
-(define_cpu_unit "c86-4g-m7-fdiv" "c86_4g_m7_fdiv")
-
;; Currently blocking all decoders for vector path instructions as
;; they are dispatched separetely as microcode sequence.
(define_reservation "c86-4g-m7-vector" "c86-4g-m7-decode0+c86-4g-m7-decode1+c86-4g-m7-decode2+c86-4g-m7-decode3")
(define_cpu_unit "c86-4g-m7-ieu2" "c86_4g_m7_ieu")
(define_cpu_unit "c86-4g-m7-ieu3" "c86_4g_m7_ieu")
+;; One separated integer divider.
+(define_cpu_unit "c86-4g-m7-idiv" "c86_4g_m7_idiv")
+
;; c86-4g-m7 has an additional branch unit.
(define_cpu_unit "c86-4g-m7-bru0" "c86_4g_m7_ieu")
(define_reservation "c86-4g-m7-ieu" "c86-4g-m7-ieu0|c86-4g-m7-ieu1|c86-4g-m7-ieu2|c86-4g-m7-ieu3")
;; vectorpath (microcoded) instructions are single issue instructions.
;; So, they occupy all the integer units.
(define_reservation "c86-4g-m7-ivector" "c86-4g-m7-ieu0+c86-4g-m7-ieu1
- +c86-4g-m7-ieu2+c86-4g-m7-ieu3+c86-4g-m7-bru0
- +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2")
+ +c86-4g-m7-ieu2+c86-4g-m7-ieu3+c86-4g-m7-bru0
+ +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2")
;; Floating point unit 4 FP pipes.
-(define_cpu_unit "c86-4g-m7-fpu0" "c86_4g_m7_fpu")
-(define_cpu_unit "c86-4g-m7-fpu1" "c86_4g_m7_fpu")
-(define_cpu_unit "c86-4g-m7-fpu2" "c86_4g_m7_fpu")
-(define_cpu_unit "c86-4g-m7-fpu3" "c86_4g_m7_fpu")
+(define_cpu_unit "c86-4g-m7-fpu0" "c86_4g_m7_fpu02")
+(define_cpu_unit "c86-4g-m7-fpu1" "c86_4g_m7_fpu13")
+(define_cpu_unit "c86-4g-m7-fpu2" "c86_4g_m7_fpu02")
+(define_cpu_unit "c86-4g-m7-fpu3" "c86_4g_m7_fpu13")
+
(define_reservation "c86-4g-m7-fpu" "c86-4g-m7-fpu0|c86-4g-m7-fpu1|c86-4g-m7-fpu2|c86-4g-m7-fpu3")
-(define_reservation "c86-4g-m7-fpu_0_2" "c86-4g-m7-fpu0|c86-4g-m7-fpu2")
-(define_reservation "c86-4g-m7-fpu_1_3" "c86-4g-m7-fpu1|c86-4g-m7-fpu3")
(define_reservation "c86-4g-m7-fpu_0_1" "c86-4g-m7-fpu0|c86-4g-m7-fpu1")
+(define_reservation "c86-4g-m7-fpu_0_2" "c86-4g-m7-fpu0|c86-4g-m7-fpu2")
(define_reservation "c86-4g-m7-fpu_0_2x2" "c86-4g-m7-fpu0*2|c86-4g-m7-fpu2*2")
(define_reservation "c86-4g-m7-fpu_0_2x4" "c86-4g-m7-fpu0*4|c86-4g-m7-fpu2*4")
+(define_reservation "c86-4g-m7-fpu_0_3" "c86-4g-m7-fpu0|c86-4g-m7-fpu3")
+(define_reservation "c86-4g-m7-fpu_1_3" "c86-4g-m7-fpu1|c86-4g-m7-fpu3")
+(define_reservation "c86-4g-m7-fpu_1_3x2" "c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2")
+(define_reservation "c86-4g-m7-fpu_1_3x3" "c86-4g-m7-fpu1*3|c86-4g-m7-fpu3*3")
+(define_reservation "c86-4g-m7-fpu_1_3x6" "c86-4g-m7-fpu1*6|c86-4g-m7-fpu3*6")
+(define_reservation "c86-4g-m7-fpux2" "c86-4g-m7-fpu0*2|c86-4g-m7-fpu1*2|c86-4g-m7-fpu2*2|c86-4g-m7-fpu3*2")
+(define_reservation "c86-4g-m7-fpux4" "c86-4g-m7-fpu0*4|c86-4g-m7-fpu1*4|c86-4g-m7-fpu2*4|c86-4g-m7-fpu3*4")
+(define_reservation "c86-4g-m7-fpux8" "c86-4g-m7-fpu0*8|c86-4g-m7-fpu1*8|c86-4g-m7-fpu2*8|c86-4g-m7-fpu3*8")
+(define_reservation "c86-4g-m7-fpux6" "c86-4g-m7-fpu0*6|c86-4g-m7-fpu1*6|c86-4g-m7-fpu2*6|c86-4g-m7-fpu3*6")
+(define_reservation "c86-4g-m7-fpux16" "c86-4g-m7-fpu0*16|c86-4g-m7-fpu1*16|c86-4g-m7-fpu2*16|c86-4g-m7-fpu3*16")
(define_reservation "c86-4g-m7-fvector" "c86-4g-m7-fpu0+c86-4g-m7-fpu1
- +c86-4g-m7-fpu2+c86-4g-m7-fpu3
- +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2")
+ +c86-4g-m7-fpu2+c86-4g-m7-fpu3
+ +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2")
+
+;; Two FP dividers.
+(define_cpu_unit "c86-4g-m7-fdiv1" "c86_4g_m7_fdiv")
+(define_cpu_unit "c86-4g-m7-fdiv3" "c86_4g_m7_fdiv")
+
+(define_reservation "c86-4g-m7-fp1fdiv1x4" "(c86-4g-m7-fpu1+c86-4g-m7-fdiv1)*4")
+(define_reservation "c86-4g-m7-fp3fdiv3x4" "(c86-4g-m7-fpu3+c86-4g-m7-fdiv3)*4")
+(define_reservation "c86-4g-m7-fdiv13" "(c86-4g-m7-fdiv1+c86-4g-m7-fdiv3)")
+(define_reservation "c86-4g-m7-fp13div13" "(c86-4g-m7-fpu1+c86-4g-m7-fpu3+c86-4g-m7-fdiv1+c86-4g-m7-fdiv3)")
+(define_reservation "c86-4g-m7-fp13div13x4" "c86-4g-m7-fp13div13*4")
+(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x8" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*8)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*8)")
+(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x9" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*9)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*9)")
+(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x11" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*11)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*11)")
+(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x15" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*15)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*15)")
+(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x18" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*18)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*18)")
;; IMOV/IMOVX
(define_insn_reservation "c86_4g_m7_imov_xchg" 1
"c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu1")
;; IDIV
-(define_insn_reservation "c86_4g_m7_idiv_DI" 41
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "idiv")
- (and (eq_attr "mode" "DI")
- (eq_attr "memory" "none"))))
- "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*41")
-
-(define_insn_reservation "c86_4g_m7_idiv_SI" 25
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "idiv")
- (and (eq_attr "mode" "SI")
- (eq_attr "memory" "none"))))
- "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*25")
-
-(define_insn_reservation "c86_4g_m7_idiv_HI" 17
+(define_insn_reservation "c86_4g_m7_idiv" 7
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "idiv")
- (and (eq_attr "mode" "HI")
+ (and (eq_attr "mode" "!QI")
(eq_attr "memory" "none"))))
- "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*17")
+ "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*7")
-(define_insn_reservation "c86_4g_m7_idiv_QI" 15
+(define_insn_reservation "c86_4g_m7_idiv_QI" 6
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "QI")
(eq_attr "memory" "none"))))
- "c86-4g-m7-direct,c86-4g-m7-ieu3,c86-4g-m7-idiv*15")
-
-(define_insn_reservation "c86_4g_m7_idiv_DI_load" 45
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "idiv")
- (and (eq_attr "mode" "DI")
- (eq_attr "memory" "load"))))
- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*41")
-
-(define_insn_reservation "c86_4g_m7_idiv_SI_load" 29
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "idiv")
- (and (eq_attr "mode" "SI")
- (eq_attr "memory" "load"))))
- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*25")
+ "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*6")
-(define_insn_reservation "c86_4g_m7_idiv_HI_load" 21
+(define_insn_reservation "c86_4g_m7_idiv_load" 11
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "idiv")
- (and (eq_attr "mode" "HI")
+ (and (eq_attr "mode" "!QI")
(eq_attr "memory" "load"))))
- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*17")
+ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*7")
-(define_insn_reservation "c86_4g_m7_idiv_QI_load" 19
+(define_insn_reservation "c86_4g_m7_idiv_QI_load" 10
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "QI")
(eq_attr "memory" "load"))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*15")
+ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*6")
;; Integer/genaral Instructions
(define_insn_reservation "c86_4g_m7_insn" 1
(and (eq_attr "type" "sseins")
(and (eq_attr "memory" "none")
(eq_attr "length_immediate" "2"))))
- "c86-4g-m7-double,c86-4g-m7-fpu0|c86-4g-m7-fpu3,c86-4g-m7-fpu1")
+ "c86-4g-m7-double,c86-4g-m7-fpu_0_3,c86-4g-m7-fpu1")
(define_insn_reservation "c86_4g_m7_sse_insert" 3
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "sseins")
(and (eq_attr "memory" "none")
(eq_attr "length_immediate" "!2"))))
- "c86-4g-m7-direct,c86-4g-m7-fpu1")
+ "c86-4g-m7-direct,c86-4g-m7-fpu1*2")
;; FCMOV
(define_insn_reservation "c86_4g_m7_fp_cmov" 4
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "fpspc")
(eq_attr "c86_attr" "sqrt")))
- "c86-4g-m7-direct,c86-4g-m7-fpu1,c86-4g-m7-fdiv*22")
+ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x18")
;; FPSPC
(define_insn_reservation "c86_4g_m7_fp_spc_direct" 5
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "none")))
- "c86-4g-m7-direct,c86-4g-m7-fpu1,c86-4g-m7-fdiv*15")
+ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x11")
(define_insn_reservation "c86_4g_m7_fp_div_load" 22
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "fdiv")
(and (eq_attr "fp_int_src" "false")
(eq_attr "memory" "!none"))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fdiv*15")
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x11")
(define_insn_reservation "c86_4g_m7_fp_idiv_load" 26
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "fdiv")
(and (eq_attr "fp_int_src" "true")
(eq_attr "memory" "!none"))))
- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fdiv*15")
+ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1*4,c86-4g-m7-fp1div1_fp3div3_x4x11")
(define_insn_reservation "c86_4g_m7_fp_fsgn" 1
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "insr")
(and (eq_attr "prefix" "orig")
(eq_attr "memory" "none")))))
- "c86-4g-m7-double,c86-4g-m7-ieu2,c86-4g-m7-fpu_0_1")
+ "c86-4g-m7-double,c86-4g-m7-ieu2,c86-4g-m7-fpu")
(define_insn_reservation "c86_4g_m7_sse_pinsr_reg_load" 3
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "insr")
(and (eq_attr "prefix" "orig")
(eq_attr "memory" "load")))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1")
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu")
(define_insn_reservation "c86_4g_m7_avx_vpinsr_reg" 2
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "insr")
(and (eq_attr "prefix" "!orig")
(eq_attr "memory" "none")))))
- "c86-4g-m7-double,c86-4g-m7-fpu2*2")
+ "c86-4g-m7-double,c86-4g-m7-fpu_1_3x2")
(define_insn_reservation "c86_4g_m7_avx_vpinsr_reg_load" 8
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "insr")
(and (eq_attr "prefix" "!orig")
(eq_attr "memory" "load")))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu2|c86-4g-m7-fpu3")
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3")
;; PERM
(define_insn_reservation "c86_4g_m7_avx512_perm_xmm" 3
(eq_attr "mode" "V4SF,V2DF,TI"))
(and (eq_attr "c86_attr" "perm")
(eq_attr "mode" "V8SF,V4DF,TI,OI")))
- (and (eq_attr "prefix" "evex")
- (eq_attr "memory" "none")))))
+ (eq_attr "memory" "none"))))
"c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2")
(define_insn_reservation "c86_4g_m7_avx512_perm_xmm_opload" 10
(eq_attr "mode" "V4SF,V2DF,TI"))
(and (eq_attr "c86_attr" "perm")
(eq_attr "mode" "V8SF,V4DF,TI,OI")))
- (and (eq_attr "prefix" "evex")
- (eq_attr "memory" "load")))))
+ (eq_attr "memory" "load"))))
"c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2")
(define_insn_reservation "c86_4g_m7_avx512_permi2_ymm" 4
(and (eq_attr "c86_attr" "perm2")
(and (eq_attr "mode" "V8SF,V4DF,OI")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpux4")
(define_insn_reservation "c86_4g_m7_avx512_permi2_zmm" 16
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "perm2")
(and (eq_attr "mode" "V16SF,V8DF,XI")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpux16")
(define_insn_reservation "c86_4g_m7_avx512_permi2_ymm_load" 11
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "perm2")
(and (eq_attr "mode" "V8SF,V4DF,OI")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux4")
(define_insn_reservation "c86_4g_m7_avx512_permi2_zmm_load" 23
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "perm2")
(and (eq_attr "mode" "V16SF,V8DF,XI")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux16")
(define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm" 4
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "mode" "V16SF,V8DF,XI")
(and (match_operand 2 "immediate_operand")
(eq_attr "memory" "none"))))))
- "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x4")
+ "c86-4g-m7-direct,c86-4g-m7-fpux4")
(define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm_load" 11
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "mode" "V16SF,V8DF,XI")
(and (match_operand 2 "immediate_operand")
(eq_attr "memory" "load"))))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4")
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpux4")
(define_insn_reservation "c86_4g_m7_avx512_perm_zmm_noimm" 8
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "mode" "V16SF,V8DF,XI")
(and (match_operand 2 "nonimmediate_operand")
(eq_attr "memory" "none"))))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpux8")
(define_insn_reservation "c86_4g_m7_sse_perm_zmm_noimm_load" 15
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "mode" "V16SF,V8DF,XI")
(and (match_operand 2 "nonimmediate_operand")
(eq_attr "memory" "load"))))))
- "c86-4g-m7-vector,c86-4g-m7-load")
-
-(define_insn_reservation "c86_4g_m7_avx_perm_ymm" 3
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "sselog")
- (and (eq_attr "c86_attr" "perm")
- (and (eq_attr "prefix" "!evex")
- (eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
-
-(define_insn_reservation "c86_4g_m7_avx_perm_ymem" 10
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "sselog")
- (and (eq_attr "c86_attr" "perm")
- (and (eq_attr "prefix" "!evex")
- (eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux8")
;; VINSERT
(define_insn_reservation "c86_4g_m7_avx512_insertx_ymm" 3
(and (eq_attr "c86_attr" "shufx")
(and (eq_attr "mode" "V8DF,V16SF,XI")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpu_0_2x4")
(define_insn_reservation "c86_4g_m7_avx512_shuf_xymem" 10
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "shufx")
(and (eq_attr "mode" "V8DF,V16SF,XI")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4")
;; SSELOGIC
(define_insn_reservation "c86_4g_m7_sselogic_xymm" 1
(and (eq_attr "type" "sselog")
(and (eq_attr "c86_attr" "cmpestr")
(eq_attr "memory" "none"))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpux6")
(define_insn_reservation "c86_4g_m7_avx512_cmpestr_load" 13
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "sselog")
(and (eq_attr "c86_attr" "cmpestr")
(eq_attr "memory" "load"))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux6")
;; SSELOG
(define_insn_reservation "c86_4g_m7_avx512_log" 1
(and (eq_attr "c86_attr" "sadbw")
(and (eq_attr "mode" "XI")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3x2")
(define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_zmem" 11
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "sadbw")
(and (eq_attr "mode" "XI")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3x2")
;; ABS
(define_insn_reservation "c86_4g_m7_avx512_abs" 1
(and (eq_attr "type" "ssecomi")
(and (eq_attr "prefix_extra" "0")
(eq_attr "memory" "none"))))
- "c86-4g-m7-double,c86-4g-m7-fpu2|c86-4g-m7-fpu3")
+ "c86-4g-m7-double,c86-4g-m7-fpu")
(define_insn_reservation "c86_4g_m7_avx_ssecomi_comi_load" 8
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssecomi")
(and (eq_attr "prefix_extra" "0")
(eq_attr "memory" "load"))))
- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu2|c86-4g-m7-fpu3")
+ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu")
(define_insn_reservation "c86_4g_m7_avx_ssecomi_test" 1
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "expand,compress")
(and (not (eq_attr "mode" "XI,V16SF,V8DF"))
(eq_attr "memory" "none")))))
- "c86-4g-m7-direct,c86-4g-m7-fpu3*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2")
+ "c86-4g-m7-direct,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3")
(define_insn_reservation "c86_4g_m7_avx512_expand_load" 10
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "expand,compress")
(and (not (eq_attr "mode" "XI,V16SF,V8DF"))
(eq_attr "memory" "load")))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2")
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3")
(define_insn_reservation "c86_4g_m7_avx512_expand_z" 10
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "expand,compress")
(and (eq_attr "mode" "XI,V16SF,V8DF")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3")
(define_insn_reservation "c86_4g_m7_avx512_expand_z_load" 17
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "expand,compress")
(and (eq_attr "mode" "XI,V16SF,V8DF")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3")
;; MOVNT
(define_insn_reservation "c86_4g_m7_avx512_movnt_load" 8
(eq_attr "memory" "!none")))))
"c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1")
-(define_insn_reservation "c86_4g_m7_sse_movnt_xy" 4
+(define_insn_reservation "c86_4g_m7_sse_movnt" 4
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssemov")
(and (eq_attr "c86_attr" "movnt")
(and (eq_attr "type" "sseadd")
(and (eq_attr "c86_attr" "other")
(eq_attr "memory" "none"))))
- "c86-4g-m7-direct,c86-4g-m7-fpu3")
+ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3")
(define_insn_reservation "c86_4g_m7_avx512_sseadd_xy_load" 10
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "sseadd")
(and (eq_attr "c86_attr" "other")
(eq_attr "memory" "load"))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3")
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3")
;; HADD/HSUB
(define_insn_reservation "c86_4g_m7_avx_sseadd_hplus" 7
(and (eq_attr "c86_attr" "hplus")
(and (eq_attr "prefix" "orig")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector,c86-4g-m7-fpu0*2")
+ "c86-4g-m7-vector,c86-4g-m7-fpux2")
(define_insn_reservation "c86_4g_m7_sse_sseiadd_hplus_load" 10
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_attr" "hplus")
(and (eq_attr "prefix" "orig")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu0*2")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux2")
;; SSEMUL
(define_insn_reservation "c86_4g_m7_avx512_ssemul" 3
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
- "c86-4g-m7-direct,c86-4g-m7-fpu0")
+ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2")
(define_insn_reservation "c86_4g_m7_avx512_ssemul_load" 10
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0")
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2")
;; SSEDIV
-(define_insn_reservation "c86_4g_m7_avx512_ssediv" 13
+(define_insn_reservation "c86_4g_m7_avx512_ssediv_x" 13
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "ssediv")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF")
+ (eq_attr "memory" "none"))))
+ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x8")
+
+(define_insn_reservation "c86_4g_m7_avx512_ssediv_xmem" 20
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "ssediv")
+ (and (eq_attr "mode" "SF,DF,V4SF,V2DF")
+ (eq_attr "memory" "load"))))
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x8")
+
+(define_insn_reservation "c86_4g_m7_avx512_ssediv_y" 13
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssediv")
- (and (not (eq_attr "mode" "V16SF,V8DF"))
+ (and (eq_attr "mode" "V8SF,V4DF")
(eq_attr "memory" "none"))))
- "c86-4g-m7-direct,c86-4g-m7-fpu3,c86-4g-m7-fdiv*13")
+ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*8")
-(define_insn_reservation "c86_4g_m7_avx512_ssediv_mem" 20
+(define_insn_reservation "c86_4g_m7_avx512_ssediv_ymem" 20
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssediv")
- (and (not (eq_attr "mode" "V16SF,V8DF"))
+ (and (eq_attr "mode" "V8SF,V4DF")
(eq_attr "memory" "load"))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fdiv*13")
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*8")
(define_insn_reservation "c86_4g_m7_avx512_ssediv_z" 24
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssediv")
(and (eq_attr "mode" "V16SF,V8DF")
(eq_attr "memory" "none"))))
- "c86-4g-m7-double,c86-4g-m7-fpu3,c86-4g-m7-fdiv*24")
+ "c86-4g-m7-double,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*20")
(define_insn_reservation "c86_4g_m7_avx512_ssediv_zmem" 31
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssediv")
(and (eq_attr "mode" "V16SF,V8DF")
(eq_attr "memory" "load"))))
- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fdiv*24")
+ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*20")
;; SSECMP
(define_insn_reservation "c86_4g_m7_avx512_ssecmp" 5
(and (eq_attr "mode" "V16SF,V8DF,XI")
(and (eq_attr "c86_attr" "other")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3")
(define_insn_reservation "c86_4g_m7_avx512_ssecmp_z_load" 12
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "mode" "V16SF,V8DF,XI")
(and (eq_attr "c86_attr" "other")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3x2")
(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp" 5
(and (eq_attr "cpu" "c86_4g_m7")
(eq_attr "memory" "load"))))))
"c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3")
+(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp_z" 5
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "mode" "XI")
+ (and (eq_attr "c86_attr" "other,ptest")
+ (eq_attr "memory" "none"))))))
+ "c86-4g-m7-double,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3")
+
+(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp_z_load" 12
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "prefix" "evex")
+ (and (eq_attr "mode" "XI")
+ (and (eq_attr "c86_attr" "other,ptest")
+ (eq_attr "memory" "load"))))))
+ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3x2")
+
(define_insn_reservation "c86_4g_m7_avx_ssecmp_vp" 1
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))))
"c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fpu_1_3")
-(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test_z" 4
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "ssecmp")
- (and (eq_attr "mode" "XI")
- (and (eq_attr "c86_attr" "ptest")
- (eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
-
-(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test_z_load" 11
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "ssecmp")
- (and (eq_attr "mode" "XI")
- (and (eq_attr "c86_attr" "ptest")
- (eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
-
;; SSECVT
(define_insn_reservation "c86_4g_m7_avx512_ssecvt_xy" 4
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssemuladd")
(and (eq_attr "c86_attr" "other")
- (and (not (eq_attr "isa" "fma,fma4"))
- (eq_attr "mode" "V32HF,V16SF,V8DF,XI")
- (eq_attr "memory" "none")))))
+ (eq_attr "memory" "none"))))
"c86-4g-m7-direct,c86-4g-m7-fpu_0_2")
(define_insn_reservation "c86_4g_m7_avx512_muladd_load" 11
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "ssemuladd")
(and (eq_attr "c86_attr" "other")
- (and (not (eq_attr "isa" "fma,fma4"))
- (eq_attr "memory" "load")))))
+ (eq_attr "memory" "load"))))
"c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2")
(define_insn_reservation "c86_4g_m7_avx512_muladd_madd" 4
(eq_attr "memory" "load")))))
"c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2")
-(define_insn_reservation "c86_4g_m7_fma_muladd" 4
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "ssemuladd")
- (and (eq_attr "isa" "fma,fma4")
- (eq_attr "memory" "none"))))
- "c86-4g-m7-direct,c86-4g-m7-fpu_0_1")
-
-(define_insn_reservation "c86_4g_m7_fma_muladd_load" 11
- (and (eq_attr "cpu" "c86_4g_m7")
- (and (eq_attr "type" "ssemuladd")
- (and (eq_attr "isa" "fma,fma4")
- (eq_attr "memory" "load"))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1")
-
;; SSE
(define_insn_reservation "c86_4g_m7_avx512_sse_range" 1
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_decode" "vector")
(and (eq_attr "mode" "TI")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3x2")
(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_x_load" 9
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_decode" "vector")
(and (eq_attr "mode" "TI")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3x2")
(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y" 5
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_decode" "vector")
(and (eq_attr "mode" "OI")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3x3")
(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y_load" 12
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_decode" "vector")
(and (eq_attr "mode" "OI")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3x3")
(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z" 8
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_decode" "vector")
(and (eq_attr "mode" "XI")
(eq_attr "memory" "none")))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3x6")
(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z_load" 15
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "c86_decode" "vector")
(and (eq_attr "mode" "XI")
(eq_attr "memory" "load")))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3x6")
(define_insn_reservation "c86_4g_m7_avx512_sse_class" 4
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "length_immediate" "1")
(and (eq_attr "mode" "V32HF,V16SF,V8DF")
(eq_attr "memory" "none"))))))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3")
(define_insn_reservation "c86_4g_m7_avx512_sse_class_z_load" 11
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "length_immediate" "1")
(and (eq_attr "mode" "V32HF,V16SF,V8DF")
(eq_attr "memory" "load"))))))
- "c86-4g-m7-vector,c86-4g-m7-load")
+ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3")
(define_insn_reservation "c86_4g_m7_avx_sse" 5
(and (eq_attr "cpu" "c86_4g_m7")
(eq_attr "memory" "load")))))
"c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1")
-(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt" 16
+;; SSE SQRT
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_x" 14
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "sse")
- (and (eq_attr "c86_attr" "sqrt")
- (eq_attr "memory" "none"))))
- "c86-4g-m7-direct,c86-4g-m7-fpu1|c86-4g-m7-fpu3,c86-4g-m7-fdiv*16")
+ (and (eq_attr "mode" "SF,V4SF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "none")))))
+ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x9")
-(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_load" 23
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_xload" 21
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "sse")
- (and (eq_attr "c86_attr" "sqrt")
- (eq_attr "memory" "load"))))
- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu3,c86-4g-m7-fdiv*16")
+ (and (eq_attr "mode" "SF,V4SF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "load")))))
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x9")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_y" 14
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "V8SF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "none")))))
+ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*9")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_yload" 21
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "V8SF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "load")))))
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*9")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_z" 26
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "V16SF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "none")))))
+ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*22")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_zload" 33
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "V16SF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "load")))))
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*22")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_x" 20
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "DF,V2DF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "none")))))
+ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x15")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_xload" 27
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "DF,V2DF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "load")))))
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x15")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_y" 20
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "V4DF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "none")))))
+ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*15")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_yload" 27
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "V4DF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "load")))))
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*15")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_z" 38
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "V8DF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "none")))))
+ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*34")
+
+(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_zload" 45
+ (and (eq_attr "cpu" "c86_4g_m7")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "mode" "V8DF")
+ (and (eq_attr "c86_attr" "sqrt")
+ (eq_attr "memory" "load")))))
+ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*34")
;; MSKLOG/MSKMOV
(define_insn_reservation "c86_4g_m7_avx512_msklog" 1
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "msklog")
(eq_attr "c86_decode" "vector")))
- "c86-4g-m7-vector")
+ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3")
(define_insn_reservation "c86_4g_m7_avx512_mskmov_reg_k" 1
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "mskmov")
(match_operand:V8DI 0 "register_operand" "v")))
- "c86-4g-m7-vector,c86-4g-m7-fpu3*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2")
+ "c86-4g-m7-vector,c86-4g-m7-fpu3,c86-4g-m7-fpu_1_3")
(define_insn_reservation "c86_4g_m7_avx512_mskmov_k_k" 1
(and (eq_attr "cpu" "c86_4g_m7")
(and (eq_attr "type" "mskmov")
(and (match_operand 0 "register_operand" "k")
(match_operand 1 "register_operand" "r"))))
- "c86-4g-m7-double,c86-4g-m7-fpu1*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2")
+ "c86-4g-m7-double,c86-4g-m7-fpu1,c86-4g-m7-fpu_1_3")
(define_insn_reservation "c86_4g_m7_avx512_mskmov_k_m" 8
(and (eq_attr "cpu" "c86_4g_m7")
;; HYGON Scheduling
;; Modeling automatons for decoders, integer execution pipes,
;; AGU pipes, floating point execution units, integer and
-;; floating point dividers.
-(define_automaton "c86_4g, c86_4g_ieu, c86_4g_fp, c86_4g_agu, c86_4g_idiv, c86_4g_fdiv")
+;; floating point dividers. Split fp1 into its own automaton
+;; to keep this unit independent without increasing the main
+;; c86_4g_fp state space.
+(define_automaton "c86_4g, c86_4g_ieu, c86_4g_fp024, c86_4g_fp1, c86_4g_agu, c86_4g_idiv, c86_4g_fdiv")
;; Decoders unit has 4 decoders and all of them can decode fast path
;; and vector type instructions.
(define_cpu_unit "c86-4g-decode2" "c86_4g")
(define_cpu_unit "c86-4g-decode3" "c86_4g")
-;; Two separated dividers for int and fp.
-(define_cpu_unit "c86-4g-idiv" "c86_4g_idiv")
-(define_cpu_unit "c86-4g-fdiv" "c86_4g_fdiv")
-
;; Currently blocking all decoders for vector path instructions as
;; they are dispatched separetely as microcode sequence.
;; Fix me: Need to revisit this.
;; Fix me: Need to revisit this later to simulate fast path double behavior.
(define_reservation "c86-4g-double" "c86-4g-direct")
-
;; Integer unit 4 ALU pipes.
(define_cpu_unit "c86-4g-ieu0" "c86_4g_ieu")
(define_cpu_unit "c86-4g-ieu1" "c86_4g_ieu")
(define_cpu_unit "c86-4g-ieu3" "c86_4g_ieu")
(define_reservation "c86-4g-ieu" "c86-4g-ieu0|c86-4g-ieu1|c86-4g-ieu2|c86-4g-ieu3")
+;; One separated integer divider.
+(define_cpu_unit "c86-4g-idiv" "c86_4g_idiv")
+
;; 2 AGU pipes in c86_4g
;; According to CPU diagram last AGU unit is used only for stores.
(define_cpu_unit "c86-4g-agu0" "c86_4g_agu")
+c86-4g-agu0+c86-4g-agu1")
;; Floating point unit 4 FP pipes.
-(define_cpu_unit "c86-4g-fp0" "c86_4g_fp")
-(define_cpu_unit "c86-4g-fp1" "c86_4g_fp")
-(define_cpu_unit "c86-4g-fp2" "c86_4g_fp")
-(define_cpu_unit "c86-4g-fp3" "c86_4g_fp")
+(define_cpu_unit "c86-4g-fp0" "c86_4g_fp024")
+(define_cpu_unit "c86-4g-fp1" "c86_4g_fp1")
+(define_cpu_unit "c86-4g-fp2" "c86_4g_fp024")
+(define_cpu_unit "c86-4g-fp3" "c86_4g_fp024")
(define_reservation "c86-4g-fpu" "c86-4g-fp0|c86-4g-fp1|c86-4g-fp2|c86-4g-fp3")
+c86-4g-fp2+c86-4g-fp3
+c86-4g-agu0+c86-4g-agu1")
+;; One separated FP divider.
+(define_cpu_unit "c86-4g-fdiv" "c86_4g_fdiv")
+
+(define_reservation "c86-4g-fp1fdivx4" "(c86-4g-fp1+c86-4g-fdiv)*4")
+
;; Call instruction
(define_insn_reservation "c86_4g_call" 1
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "type" "fpspc")
(eq_attr "c86_attr" "sqrt")))
- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*22")
+ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*18")
(define_insn_reservation "c86_4g_sse_sqrt_sf" 14
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "memory" "none,unknown")
(and (eq_attr "c86_attr" "sqrt")
(eq_attr "type" "sse")))))
- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*14")
+ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*10")
(define_insn_reservation "c86_4g_sse_sqrt_sf_mem" 21
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "memory" "load")
(and (eq_attr "c86_attr" "sqrt")
(eq_attr "type" "sse")))))
- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*14")
+ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*10")
(define_insn_reservation "c86_4g_sse_sqrt_df" 20
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "memory" "none,unknown")
(and (eq_attr "c86_attr" "sqrt")
(eq_attr "type" "sse")))))
- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*20")
+ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*16")
(define_insn_reservation "c86_4g_sse_sqrt_df_mem" 27
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "memory" "load")
(and (eq_attr "c86_attr" "sqrt")
(eq_attr "type" "sse")))))
- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*20")
+ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*16")
;; RCP
(define_insn_reservation "c86_4g_sse_rcp" 5
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "none")))
- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*15")
+ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*11")
(define_insn_reservation "c86_4g_fp_op_div_load" 22
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "load")))
- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*15")
+ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*11")
-(define_insn_reservation "c86_4g_fp_op_idiv_load" 27
+(define_insn_reservation "c86_4g_fp_op_idiv_load" 26
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "type" "fdiv")
(and (eq_attr "fp_int_src" "true")
(eq_attr "memory" "load"))))
- "c86-4g-double,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*19")
+ "c86-4g-double,c86-4g-load,c86-4g-fp1*4,c86-4g-fp1fdivx4,c86-4g-fdiv*11")
;; MMX, SSE, SSEn.n, AVX, AVX2 instructions
(define_insn_reservation "c86_4g_fp_insn" 1
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none")))
- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*10")
+ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*6")
(define_insn_reservation "c86_4g_ssediv_ss_ps_load" 17
(and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*10")
+ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*6")
(define_insn_reservation "c86_4g_ssediv_sd_pd" 13
(and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(eq_attr "mode" "V2DF,DF"))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none")))
- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*13")
+ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*9")
(define_insn_reservation "c86_4g_ssediv_sd_pd_load" 20
(and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(eq_attr "mode" "V2DF,DF"))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*13")
+ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*9")
(define_insn_reservation "c86_4g_ssediv_avx256_ps" 10
(and (eq_attr "mode" "V8SF")
(and (eq_attr "memory" "none")
(eq_attr "type" "ssediv"))))
- "c86-4g-double,c86-4g-fp1,c86-4g-fdiv*10")
+ "c86-4g-double,c86-4g-fp1fdivx4,c86-4g-fdiv*6")
(define_insn_reservation "c86_4g_ssediv_avx256_ps_load" 17
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "mode" "V8SF")
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load"))))
- "c86-4g-double,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*10")
+ "c86-4g-double,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*6")
(define_insn_reservation "c86_4g_ssediv_avx256_pd" 13
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "mode" "V4DF")
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none"))))
- "c86-4g-double,c86-4g-fp1,c86-4g-fdiv*13")
+ "c86-4g-double,c86-4g-fp1fdivx4,c86-4g-fdiv*9")
(define_insn_reservation "c86_4g_ssediv_avx256_pd_load" 20
(and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")
(and (eq_attr "mode" "V4DF")
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load"))))
- "c86-4g-double,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*13")
+ "c86-4g-double,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*9")
;; SSE MUL
(define_insn_reservation "c86_4g_ssemul_ss_ps" 3
(and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6")