-(define_automaton "sifive_7")
+(define_automaton "sifive_7,sifive_7_vec,sifive_7_vec_mem")
;; Sifive 7 Series Base Core
;; This has two pipelines, A (Address) and B (Branch).
(define_cpu_unit "sifive_7_idiv" "sifive_7")
(define_cpu_unit "sifive_7_fpu" "sifive_7")
+;; Vector command queue
+(define_cpu_unit "sifive_7_vcq" "sifive_7")
+;; Vector arithmetic sequencer
+(define_cpu_unit "sifive_7_va" "sifive_7_vec")
+;; Vector store sequencer
+(define_cpu_unit "sifive_7_vs" "sifive_7_vec_mem")
+;; Vector load sequencer
+(define_cpu_unit "sifive_7_vl" "sifive_7_vec_mem")
(define_insn_reservation "sifive_7_load" 3
(and (eq_attr "tune" "sifive_7")
(define_bypass 2 "sifive_7_fp_other"
"sifive_7_store" "riscv_store_data_bypass_p")
+
+;; Vector pipeline
+;; The latency is depend on LMUL, but we didn't model that yet since we don't
+;; want to expand the rule too much unless we prove model that could get
+;; meaningful performance difference.
+
+(define_insn_reservation "sifive_7_vsetvl" 2
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vsetvl_pre,vsetvl,rdvlenb,rdvl"))
+ "sifive_7_A")
+
+(define_insn_reservation "sifive_7_vec_load" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr,
+ vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff"))
+ "sifive_7_vcq,sifive_7_vl*3")
+
+(define_insn_reservation "sifive_7_vec_store" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr,
+ vssegte,vssegts,vssegtux,vssegtox"))
+ "sifive_7_vcq,sifive_7_vs*3")
+
+(define_insn_reservation "sifive_7_vec_ialu" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vimovxv,vmov,vimovvx,vialu,vicalu,vext,
+ vshift,viminmax,vimerge,vbrev,vrev8,
+ vimov,vext,vbrev8,vclz,vctz,vcpop,vrol,vror,vandn"))
+ "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_slow_ialu" 8
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vshift,vimul,vimuladd"))
+ "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_cmp" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vicmp"))
+ "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_iwalu" 8
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "viwalu,viwmul,viwmuladd,vnshift,vwsll"))
+ "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_div" 16
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vidiv,vfdiv"))
+ "sifive_7_vcq,sifive_7_va*15")
+
+(define_insn_reservation "sifive_7_vec_fixed_point" 8
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vsalu,vaalu,vsmul,vsshift"))
+ "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_narrow_fixed_point" 8
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vnclip"))
+ "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_fsimple" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfmovvf,vfmovfv,vfclass"))
+ "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_falu" 8
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfalu,vfmul,vfmuladd,vfrecp,
+ vfcvtitof,vfcvtftoi,vfmerge,vfmov,vfsgnj"))
+ "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_fcmp" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfcmp,vfminmax"))
+ "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_fsqrt_fdiv" 16
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfsqrt,vfdiv"))
+ "sifive_7_vcq,sifive_7_va*15")
+
+(define_insn_reservation "sifive_7_vec_fwalu" 8
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfwalu,vfwmul,vfwmuladd,vfwmaccbf16,vfwcvtitof,
+ vfwcvtftoi,vfwcvtftof,vfwcvtbf16,
+ vfncvtitof,vfncvtftoi,vfncvtftof,vfncvtbf16,
+ sf_vfnrclip,sf_vqmacc"))
+ "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_red" 12
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vired,vfredu,vfredo,viwred,vfwredu,vfwredo"))
+ "sifive_7_vcq,sifive_7_va*11")
+
+(define_insn_reservation "sifive_7_vec_mask" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs"))
+ "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_mask_special" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vmiota,vmidx"))
+ "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_gather" 8
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vgather"))
+ "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_compress" 16
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vcompress"))
+ "sifive_7_vcq,sifive_7_va*15")
+
+(define_insn_reservation "sifive_7_vec_slide" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down"))
+ "sifive_7_vcq,sifive_7_va*3")
+
+;; Assume that's slow if it's unknown instruction vector type.
+(define_insn_reservation "sifive_7_vec_unknown" 16
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vector,vclmul,vclmulh,vghsh,vgmul,
+ vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,
+ vaesz,vsha2ms,vsha2ch,vsha2cl,
+ vsm4k,vsm4r,vsm3me,vsm3c,sf_vc,sf_vc_se"))
+ "sifive_7_vcq,sifive_7_va*15")