[thirdparty/gcc.git] / gcc / config / aarch64 / thunderx.md

;; Cavium ThunderX pipeline description
;; Copyright (C) 2014-2020 Free Software Foundation, Inc.
;;
;; Written by Andrew Pinski  <apinski@cavium.com>

;; This file is part of GCC.

;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.

;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; <http://www.gnu.org/licenses/>.


;; Thunder is a dual-issue processor that can issue all instructions on
;; pipe0 and a subset on pipe1.


(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd")

(define_cpu_unit "thunderx_pipe0" "thunderx_main")
(define_cpu_unit "thunderx_pipe1" "thunderx_main")
(define_cpu_unit "thunderx_mult" "thunderx_mult")
(define_cpu_unit "thunderx_divide" "thunderx_divide")
(define_cpu_unit "thunderx_simd" "thunderx_simd")

(define_insn_reservation "thunderx_add" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_sreg,alus_imm,alus_sreg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg"))
  "thunderx_pipe0 | thunderx_pipe1")

(define_insn_reservation "thunderx_shift" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "bfm,bfx,extend,rotate_imm,shift_imm,shift_reg,rbit,rev"))
  "thunderx_pipe0 | thunderx_pipe1")


;; Arthimentic instructions with an extra shift or extend is two cycles.
;; FIXME: This needs more attributes on aarch64 than what is currently there;
;;    this is conserative for now.
;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3)
;; Except this is not correct as this is only for !(zero extend)

(define_insn_reservation "thunderx_arith_shift" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
  "thunderx_pipe0 | thunderx_pipe1")

(define_insn_reservation "thunderx_csel" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "csel"))
  "thunderx_pipe0 | thunderx_pipe1")

;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1

(define_insn_reservation "thunderx_mul" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
  "thunderx_pipe1 + thunderx_mult")

;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1

(define_insn_reservation "thunderx_crc32" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "crc"))
  "thunderx_pipe1 + thunderx_mult")

;; crcx is 5 cycles and only happen on pipe 1
;(define_insn_reservation "thunderx_crc64" 5
;  (and (eq_attr "tune" "thunderx")
;       (eq_attr "type" "crc")
;       (eq_attr "mode" "DI"))
;  "thunderx_pipe1 + thunderx_mult")

(define_insn_reservation "thunderx_div32" 22
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "udiv,sdiv"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21")

;(define_insn_reservation "thunderx_div64" 38
;  (and (eq_attr "tune" "thunderx")
;       (eq_attr "type" "udiv,sdiv")
;       (eq_attr "mode" "DI"))
;  "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34")

;; Stores take one cycle in pipe 0
(define_insn_reservation "thunderx_store" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "store_4"))
  "thunderx_pipe0")

;; Store pair are single issued
(define_insn_reservation "thunderx_storepair" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "store_8,store_16"))
  "thunderx_pipe0 + thunderx_pipe1")

;; Prefetch are single issued
;(define_insn_reservation "thunderx_prefetch" 1
;  (and (eq_attr "tune" "thunderx")
;       (eq_attr "type" "prefetch"))
;  "thunderx_pipe0 + thunderx_pipe1")

;; loads (and load pairs) from L1 take 3 cycles in pipe 0
(define_insn_reservation "thunderx_load" 3
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "load_4, load_8, load_16"))
  "thunderx_pipe0")

(define_insn_reservation "thunderx_brj" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "branch,trap,call"))
  "thunderx_pipe1")

;; FPU

(define_insn_reservation "thunderx_fadd" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "faddd,fadds"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fconst" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fconsts,fconstd"))
  "thunderx_pipe1")

;; Moves between fp are 2 cycles including min/max
(define_insn_reservation "thunderx_fmov" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fmov,f_minmaxs,f_minmaxd"))
  "thunderx_pipe1")

;; ABS, and NEG are 1 cycle
(define_insn_reservation "thunderx_fabs" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "ffariths,ffarithd"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fcsel" 3
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fcsel"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fmovgpr" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_mrc, f_mcr"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fcmp" 3
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fmul" 6
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fdivs" 12
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fdivs"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide*8")

(define_insn_reservation "thunderx_fdivd" 22
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fdivd"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide*18")

(define_insn_reservation "thunderx_fsqrts" 17
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fsqrts"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide*13")

(define_insn_reservation "thunderx_fsqrtd" 31
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fsqrtd"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide*27")

;; The rounding conversion inside fp is 4 cycles
(define_insn_reservation "thunderx_frint" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_cvt,f_rints,f_rintd"))
  "thunderx_pipe1")

;; Float to integer with a move from int to/from float is 6 cycles
(define_insn_reservation "thunderx_f_cvt" 6
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_cvtf2i,f_cvti2f"))
  "thunderx_pipe1")

;; FP/SIMD load/stores happen in pipe 0
;; 64bit Loads register/pairs are 4 cycles from L1
(define_insn_reservation "thunderx_64simd_fp_load" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\
			neon_load1_1reg_q,neon_load1_2reg"))
  "thunderx_pipe0")

;; 128bit load pair is singled issue and 4 cycles from L1
(define_insn_reservation "thunderx_128simd_pair_load" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_load1_2reg_q"))
  "thunderx_pipe0+thunderx_pipe1")

;; FP/SIMD Stores takes one cycle in pipe 0
;; ST1 with one registers either multiple structures or single structure is
;;    also one cycle.
(define_insn_reservation "thunderx_simd_fp_store" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \
			neon_store1_one_lane, neon_store1_one_lane_q"))
  "thunderx_pipe0")

;; 64bit neon store pairs are single issue for one cycle
(define_insn_reservation "thunderx_64neon_storepair" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_store1_2reg"))
  "thunderx_pipe0 + thunderx_pipe1")

;; 128bit neon store pair are single issued for two cycles
(define_insn_reservation "thunderx_128neon_storepair" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_store1_2reg_q"))
  "(thunderx_pipe0 + thunderx_pipe1)*2")

;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0
(define_insn_reservation "thunderx_neon_ld1" 6
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_load1_all_lanes"))
  "thunderx_pipe0")

;; SIMD/NEON (q forms take an extra cycle)
;; SIMD For ThunderX is 64bit wide,

;; ThunderX simd move instruction types - 2/3 cycles
;; ThunderX dup, ins is the same
;; ThunderX SIMD fabs/fneg instruction types
(define_insn_reservation "thunderx_neon_move" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
			neon_fp_compare_d, neon_move, neon_dup, \
			neon_ins, neon_from_gp, neon_to_gp, \
			neon_abs, neon_neg, \
			neon_fp_neg_s, neon_fp_abs_s"))
  "thunderx_pipe1 + thunderx_simd")

(define_insn_reservation "thunderx_neon_move_q" 3
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
			neon_fp_compare_d_q, neon_move_q, neon_dup_q, \
			neon_ins_q, neon_from_gp_q, neon_to_gp_q, \
			neon_abs_q, neon_neg_q, \
			neon_fp_neg_s_q, neon_fp_neg_d_q, \
			neon_fp_abs_s_q, neon_fp_abs_d_q"))
  "thunderx_pipe1 + thunderx_simd, thunderx_simd")

;; ThunderX simd simple/add instruction types - 4/5 cycles

(define_insn_reservation "thunderx_neon_add" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \
			neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \
			neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
			neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
			neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
			neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \
			neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \
			neon_fp_reduc_minmax_s"))
  "thunderx_pipe1 + thunderx_simd")

;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect

(define_insn_reservation "thunderx_neon_add_q" 5
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \
			neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \
			neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \
			neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
			neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
			neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
			neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \
			neon_arith_acc_q, neon_rev_q, \
			neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q"))
  "thunderx_pipe1 + thunderx_simd, thunderx_simd")

;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions
;; are 6/7 cycles
(define_insn_reservation "thunderx_neon_mult" 6
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \
			neon_mla_b, neon_mla_h, neon_mla_s, \
			neon_mla_h_scalar, neon_mla_s_scalar, \
			neon_ext, neon_shift_imm, neon_permute, \
			neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \
			neon_sat_shift_reg, neon_shift_acc, \
			neon_mul_b, neon_mul_h, neon_mul_s, \
			neon_mul_h_scalar, neon_mul_s_scalar, \
			neon_fp_mul_s_scalar, \
			neon_fp_mla_s_scalar"))
  "thunderx_pipe1 + thunderx_simd")

(define_insn_reservation "thunderx_neon_mult_q" 7
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \
			neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \
			neon_mla_h_scalar_q, neon_mla_s_scalar_q, \
			neon_ext_q, neon_shift_imm_q, neon_permute_q, \
			neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \
			neon_sat_shift_reg_q, neon_shift_acc_q, \
			neon_shift_imm_long, \
			neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \
			neon_mul_h_scalar_q, neon_mul_s_scalar_q, \
			neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \
			neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \
			neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \
			neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q"))
  "thunderx_pipe1 + thunderx_simd, thunderx_simd")


;; AES[ED] is 5 cycles
(define_insn_reservation "thunderx_crypto_aese" 5
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "crypto_aese"))
  "thunderx_pipe1 + thunderx_simd, thunderx_simd")

;; AES{,I}MC is 3 cycles
(define_insn_reservation "thunderx_crypto_aesmc" 3
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "crypto_aesmc"))
  "thunderx_pipe1 + thunderx_simd, thunderx_simd")


;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle
(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
(define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")

;; 64bit TBL is emulated and takes 160 cycles
(define_insn_reservation "thunderx_tbl" 160
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_tbl1"))
  "(thunderx_pipe1+thunderx_pipe0)*160")

;; 128bit TBL is emulated and takes 320 cycles
(define_insn_reservation "thunderx_tblq" 320
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_tbl1_q"))
  "(thunderx_pipe1+thunderx_pipe0)*320")

;; Assume both pipes are needed for unknown and multiple-instruction
;; patterns.

(define_insn_reservation "thunderx_unknown" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "untyped,multiple"))
  "thunderx_pipe0 + thunderx_pipe1")
Commit	Line	Data
2d41ed58	1	;; Cavium ThunderX pipeline description
8d9254fc	2	;; Copyright (C) 2014-2020 Free Software Foundation, Inc.
2d41ed58 AP	3	;;
	4	;; Written by Andrew Pinski <apinski@cavium.com>
	5
	6	;; This file is part of GCC.
	7
	8	;; GCC is free software; you can redistribute it and/or modify
	9	;; it under the terms of the GNU General Public License as published by
	10	;; the Free Software Foundation; either version 3, or (at your option)
	11	;; any later version.
	12
	13	;; GCC is distributed in the hope that it will be useful,
	14	;; but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	;; GNU General Public License for more details.
	17
	18	;; You should have received a copy of the GNU General Public License
	19	;; along with GCC; see the file COPYING3. If not see
	20	;; <http://www.gnu.org/licenses/>.
2d41ed58 AP	21
	22
	23	;; Thunder is a dual-issue processor that can issue all instructions on
	24	;; pipe0 and a subset on pipe1.
	25
	26
	27	(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd")
	28
	29	(define_cpu_unit "thunderx_pipe0" "thunderx_main")
	30	(define_cpu_unit "thunderx_pipe1" "thunderx_main")
	31	(define_cpu_unit "thunderx_mult" "thunderx_mult")
	32	(define_cpu_unit "thunderx_divide" "thunderx_divide")
	33	(define_cpu_unit "thunderx_simd" "thunderx_simd")
	34
	35	(define_insn_reservation "thunderx_add" 1
	36	(and (eq_attr "tune" "thunderx")
	37	(eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_sreg,alus_imm,alus_sreg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg"))
	38	"thunderx_pipe0 \| thunderx_pipe1")
	39
	40	(define_insn_reservation "thunderx_shift" 1
	41	(and (eq_attr "tune" "thunderx")
94f7a25e	42	(eq_attr "type" "bfm,bfx,extend,rotate_imm,shift_imm,shift_reg,rbit,rev"))
2d41ed58 AP	43	"thunderx_pipe0 \| thunderx_pipe1")
	44
	45
	46	;; Arthimentic instructions with an extra shift or extend is two cycles.
	47	;; FIXME: This needs more attributes on aarch64 than what is currently there;
	48	;; this is conserative for now.
	49	;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3)
	50	;; Except this is not correct as this is only for !(zero extend)
	51
	52	(define_insn_reservation "thunderx_arith_shift" 2
	53	(and (eq_attr "tune" "thunderx")
	54	(eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
	55	"thunderx_pipe0 \| thunderx_pipe1")
	56
	57	(define_insn_reservation "thunderx_csel" 2
	58	(and (eq_attr "tune" "thunderx")
	59	(eq_attr "type" "csel"))
	60	"thunderx_pipe0 \| thunderx_pipe1")
	61
	62	;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1
	63
	64	(define_insn_reservation "thunderx_mul" 4
	65	(and (eq_attr "tune" "thunderx")
	66	(eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
	67	"thunderx_pipe1 + thunderx_mult")
	68
ed9fa8d2	69	;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1
2d41ed58	70
ed9fa8d2 AP	71	(define_insn_reservation "thunderx_crc32" 4
	72	(and (eq_attr "tune" "thunderx")
	73	(eq_attr "type" "crc"))
	74	"thunderx_pipe1 + thunderx_mult")
	75
	76	;; crcx is 5 cycles and only happen on pipe 1
	77	;(define_insn_reservation "thunderx_crc64" 5
2d41ed58	78	; (and (eq_attr "tune" "thunderx")
ed9fa8d2 AP	79	; (eq_attr "type" "crc")
ed9fa8d2 AP	80	; (eq_attr "mode" "DI"))
2d41ed58 AP	81	; "thunderx_pipe1 + thunderx_mult")
	82
	83	(define_insn_reservation "thunderx_div32" 22
	84	(and (eq_attr "tune" "thunderx")
	85	(eq_attr "type" "udiv,sdiv"))
	86	"thunderx_pipe1 + thunderx_divide, thunderx_divide * 21")
	87
	88	;(define_insn_reservation "thunderx_div64" 38
	89	; (and (eq_attr "tune" "thunderx")
	90	; (eq_attr "type" "udiv,sdiv")
	91	; (eq_attr "mode" "DI"))
	92	; "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34")
	93
	94	;; Stores take one cycle in pipe 0
	95	(define_insn_reservation "thunderx_store" 1
	96	(and (eq_attr "tune" "thunderx")
89b2133e	97	(eq_attr "type" "store_4"))
2d41ed58 AP	98	"thunderx_pipe0")
	99
	100	;; Store pair are single issued
	101	(define_insn_reservation "thunderx_storepair" 1
	102	(and (eq_attr "tune" "thunderx")
db46a2e6	103	(eq_attr "type" "store_8,store_16"))
2d41ed58 AP	104	"thunderx_pipe0 + thunderx_pipe1")
2d41ed58 AP	105
ed9fa8d2 AP	106	;; Prefetch are single issued
	107	;(define_insn_reservation "thunderx_prefetch" 1
	108	; (and (eq_attr "tune" "thunderx")
	109	; (eq_attr "type" "prefetch"))
	110	; "thunderx_pipe0 + thunderx_pipe1")
2d41ed58 AP	111
	112	;; loads (and load pairs) from L1 take 3 cycles in pipe 0
	113	(define_insn_reservation "thunderx_load" 3
	114	(and (eq_attr "tune" "thunderx")
db46a2e6	115	(eq_attr "type" "load_4, load_8, load_16"))
2d41ed58 AP	116	"thunderx_pipe0")
	117
	118	(define_insn_reservation "thunderx_brj" 1
	119	(and (eq_attr "tune" "thunderx")
	120	(eq_attr "type" "branch,trap,call"))
	121	"thunderx_pipe1")
	122
	123	;; FPU
	124
	125	(define_insn_reservation "thunderx_fadd" 4
	126	(and (eq_attr "tune" "thunderx")
	127	(eq_attr "type" "faddd,fadds"))
	128	"thunderx_pipe1")
	129
	130	(define_insn_reservation "thunderx_fconst" 1
	131	(and (eq_attr "tune" "thunderx")
	132	(eq_attr "type" "fconsts,fconstd"))
	133	"thunderx_pipe1")
	134
ed9fa8d2	135	;; Moves between fp are 2 cycles including min/max
2d41ed58 AP	136	(define_insn_reservation "thunderx_fmov" 2
2d41ed58 AP	137	(and (eq_attr "tune" "thunderx")
ed9fa8d2 AP	138	(eq_attr "type" "fmov,f_minmaxs,f_minmaxd"))
	139	"thunderx_pipe1")
	140
	141	;; ABS, and NEG are 1 cycle
	142	(define_insn_reservation "thunderx_fabs" 1
	143	(and (eq_attr "tune" "thunderx")
	144	(eq_attr "type" "ffariths,ffarithd"))
	145	"thunderx_pipe1")
	146
	147	(define_insn_reservation "thunderx_fcsel" 3
	148	(and (eq_attr "tune" "thunderx")
	149	(eq_attr "type" "fcsel"))
2d41ed58 AP	150	"thunderx_pipe1")
	151
	152	(define_insn_reservation "thunderx_fmovgpr" 2
	153	(and (eq_attr "tune" "thunderx")
	154	(eq_attr "type" "f_mrc, f_mcr"))
	155	"thunderx_pipe1")
	156
ed9fa8d2 AP	157	(define_insn_reservation "thunderx_fcmp" 3
ed9fa8d2 AP	158	(and (eq_attr "tune" "thunderx")
c297d256	159	(eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
ed9fa8d2 AP	160	"thunderx_pipe1")
ed9fa8d2 AP	161
2d41ed58 AP	162	(define_insn_reservation "thunderx_fmul" 6
	163	(and (eq_attr "tune" "thunderx")
	164	(eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
	165	"thunderx_pipe1")
	166
	167	(define_insn_reservation "thunderx_fdivs" 12
	168	(and (eq_attr "tune" "thunderx")
	169	(eq_attr "type" "fdivs"))
	170	"thunderx_pipe1 + thunderx_divide, thunderx_divide*8")
	171
	172	(define_insn_reservation "thunderx_fdivd" 22
	173	(and (eq_attr "tune" "thunderx")
	174	(eq_attr "type" "fdivd"))
	175	"thunderx_pipe1 + thunderx_divide, thunderx_divide*18")
	176
	177	(define_insn_reservation "thunderx_fsqrts" 17
	178	(and (eq_attr "tune" "thunderx")
	179	(eq_attr "type" "fsqrts"))
	180	"thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
	181
ed9fa8d2	182	(define_insn_reservation "thunderx_fsqrtd" 31
2d41ed58 AP	183	(and (eq_attr "tune" "thunderx")
2d41ed58 AP	184	(eq_attr "type" "fsqrtd"))
ed9fa8d2	185	"thunderx_pipe1 + thunderx_divide, thunderx_divide*27")
2d41ed58 AP	186
	187	;; The rounding conversion inside fp is 4 cycles
	188	(define_insn_reservation "thunderx_frint" 4
	189	(and (eq_attr "tune" "thunderx")
ed9fa8d2	190	(eq_attr "type" "f_cvt,f_rints,f_rintd"))
2d41ed58 AP	191	"thunderx_pipe1")
	192
	193	;; Float to integer with a move from int to/from float is 6 cycles
	194	(define_insn_reservation "thunderx_f_cvt" 6
	195	(and (eq_attr "tune" "thunderx")
ed9fa8d2	196	(eq_attr "type" "f_cvtf2i,f_cvti2f"))
2d41ed58 AP	197	"thunderx_pipe1")
	198
	199	;; FP/SIMD load/stores happen in pipe 0
	200	;; 64bit Loads register/pairs are 4 cycles from L1
	201	(define_insn_reservation "thunderx_64simd_fp_load" 4
	202	(and (eq_attr "tune" "thunderx")
	203	(eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\
	204	neon_load1_1reg_q,neon_load1_2reg"))
	205	"thunderx_pipe0")
	206
	207	;; 128bit load pair is singled issue and 4 cycles from L1
	208	(define_insn_reservation "thunderx_128simd_pair_load" 4
	209	(and (eq_attr "tune" "thunderx")
	210	(eq_attr "type" "neon_load1_2reg_q"))
	211	"thunderx_pipe0+thunderx_pipe1")
	212
	213	;; FP/SIMD Stores takes one cycle in pipe 0
ed9fa8d2 AP	214	;; ST1 with one registers either multiple structures or single structure is
ed9fa8d2 AP	215	;; also one cycle.
2d41ed58 AP	216	(define_insn_reservation "thunderx_simd_fp_store" 1
2d41ed58 AP	217	(and (eq_attr "tune" "thunderx")
ed9fa8d2 AP	218	(eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \
ed9fa8d2 AP	219	neon_store1_one_lane, neon_store1_one_lane_q"))
2d41ed58 AP	220	"thunderx_pipe0")
	221
	222	;; 64bit neon store pairs are single issue for one cycle
	223	(define_insn_reservation "thunderx_64neon_storepair" 1
	224	(and (eq_attr "tune" "thunderx")
	225	(eq_attr "type" "neon_store1_2reg"))
	226	"thunderx_pipe0 + thunderx_pipe1")
	227
	228	;; 128bit neon store pair are single issued for two cycles
	229	(define_insn_reservation "thunderx_128neon_storepair" 2
	230	(and (eq_attr "tune" "thunderx")
	231	(eq_attr "type" "neon_store1_2reg_q"))
	232	"(thunderx_pipe0 + thunderx_pipe1)*2")
	233
ed9fa8d2 AP	234	;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0
	235	(define_insn_reservation "thunderx_neon_ld1" 6
	236	(and (eq_attr "tune" "thunderx")
	237	(eq_attr "type" "neon_load1_all_lanes"))
	238	"thunderx_pipe0")
2d41ed58 AP	239
2d41ed58 AP	240	;; SIMD/NEON (q forms take an extra cycle)
ed9fa8d2	241	;; SIMD For ThunderX is 64bit wide,
2d41ed58	242
ed9fa8d2 AP	243	;; ThunderX simd move instruction types - 2/3 cycles
	244	;; ThunderX dup, ins is the same
	245	;; ThunderX SIMD fabs/fneg instruction types
2d41ed58 AP	246	(define_insn_reservation "thunderx_neon_move" 2
	247	(and (eq_attr "tune" "thunderx")
	248	(eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
ed9fa8d2 AP	249	neon_fp_compare_d, neon_move, neon_dup, \
	250	neon_ins, neon_from_gp, neon_to_gp, \
	251	neon_abs, neon_neg, \
	252	neon_fp_neg_s, neon_fp_abs_s"))
2d41ed58 AP	253	"thunderx_pipe1 + thunderx_simd")
	254
	255	(define_insn_reservation "thunderx_neon_move_q" 3
	256	(and (eq_attr "tune" "thunderx")
	257	(eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
ed9fa8d2 AP	258	neon_fp_compare_d_q, neon_move_q, neon_dup_q, \
	259	neon_ins_q, neon_from_gp_q, neon_to_gp_q, \
	260	neon_abs_q, neon_neg_q, \
	261	neon_fp_neg_s_q, neon_fp_neg_d_q, \
	262	neon_fp_abs_s_q, neon_fp_abs_d_q"))
2d41ed58 AP	263	"thunderx_pipe1 + thunderx_simd, thunderx_simd")
2d41ed58 AP	264
ed9fa8d2	265	;; ThunderX simd simple/add instruction types - 4/5 cycles
2d41ed58 AP	266
	267	(define_insn_reservation "thunderx_neon_add" 4
	268	(and (eq_attr "tune" "thunderx")
	269	(eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \
	270	neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \
	271	neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
	272	neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
	273	neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
ed9fa8d2 AP	274	neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \
	275	neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \
	276	neon_fp_reduc_minmax_s"))
2d41ed58 AP	277	"thunderx_pipe1 + thunderx_simd")
	278
	279	;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
	280
	281	(define_insn_reservation "thunderx_neon_add_q" 5
	282	(and (eq_attr "tune" "thunderx")
	283	(eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \
	284	neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \
	285	neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \
	286	neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
	287	neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
	288	neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
ed9fa8d2 AP	289	neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \
	290	neon_arith_acc_q, neon_rev_q, \
	291	neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q"))
2d41ed58 AP	292	"thunderx_pipe1 + thunderx_simd, thunderx_simd")
2d41ed58 AP	293
ed9fa8d2 AP	294	;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions
	295	;; are 6/7 cycles
	296	(define_insn_reservation "thunderx_neon_mult" 6
	297	(and (eq_attr "tune" "thunderx")
	298	(eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \
	299	neon_mla_b, neon_mla_h, neon_mla_s, \
	300	neon_mla_h_scalar, neon_mla_s_scalar, \
	301	neon_ext, neon_shift_imm, neon_permute, \
	302	neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \
	303	neon_sat_shift_reg, neon_shift_acc, \
	304	neon_mul_b, neon_mul_h, neon_mul_s, \
	305	neon_mul_h_scalar, neon_mul_s_scalar, \
	306	neon_fp_mul_s_scalar, \
	307	neon_fp_mla_s_scalar"))
	308	"thunderx_pipe1 + thunderx_simd")
	309
	310	(define_insn_reservation "thunderx_neon_mult_q" 7
	311	(and (eq_attr "tune" "thunderx")
	312	(eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \
	313	neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \
	314	neon_mla_h_scalar_q, neon_mla_s_scalar_q, \
	315	neon_ext_q, neon_shift_imm_q, neon_permute_q, \
	316	neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \
	317	neon_sat_shift_reg_q, neon_shift_acc_q, \
	318	neon_shift_imm_long, \
	319	neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \
	320	neon_mul_h_scalar_q, neon_mul_s_scalar_q, \
	321	neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \
	322	neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \
	323	neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \
	324	neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q"))
	325	"thunderx_pipe1 + thunderx_simd, thunderx_simd")
	326
	327
	328	;; AES[ED] is 5 cycles
	329	(define_insn_reservation "thunderx_crypto_aese" 5
	330	(and (eq_attr "tune" "thunderx")
	331	(eq_attr "type" "crypto_aese"))
	332	"thunderx_pipe1 + thunderx_simd, thunderx_simd")
2d41ed58	333
ed9fa8d2 AP	334	;; AES{,I}MC is 3 cycles
	335	(define_insn_reservation "thunderx_crypto_aesmc" 3
	336	(and (eq_attr "tune" "thunderx")
	337	(eq_attr "type" "crypto_aesmc"))
	338	"thunderx_pipe1 + thunderx_simd, thunderx_simd")
	339
	340
	341	;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle
	342	(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
	343	(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
	344	(define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
	345
	346	;; 64bit TBL is emulated and takes 160 cycles
	347	(define_insn_reservation "thunderx_tbl" 160
	348	(and (eq_attr "tune" "thunderx")
	349	(eq_attr "type" "neon_tbl1"))
	350	"(thunderx_pipe1+thunderx_pipe0)*160")
	351
	352	;; 128bit TBL is emulated and takes 320 cycles
	353	(define_insn_reservation "thunderx_tblq" 320
	354	(and (eq_attr "tune" "thunderx")
	355	(eq_attr "type" "neon_tbl1_q"))
	356	"(thunderx_pipe1+thunderx_pipe0)*320")
2d41ed58 AP	357
	358	;; Assume both pipes are needed for unknown and multiple-instruction
	359	;; patterns.
	360
	361	(define_insn_reservation "thunderx_unknown" 1
	362	(and (eq_attr "tune" "thunderx")
	363	(eq_attr "type" "untyped,multiple"))
	364	"thunderx_pipe0 + thunderx_pipe1")
	365
	366