]>
Commit | Line | Data |
---|---|---|
2d41ed58 | 1 | ;; Cavium ThunderX pipeline description |
8d9254fc | 2 | ;; Copyright (C) 2014-2020 Free Software Foundation, Inc. |
2d41ed58 AP |
3 | ;; |
4 | ;; Written by Andrew Pinski <apinski@cavium.com> | |
5 | ||
6 | ;; This file is part of GCC. | |
7 | ||
8 | ;; GCC is free software; you can redistribute it and/or modify | |
9 | ;; it under the terms of the GNU General Public License as published by | |
10 | ;; the Free Software Foundation; either version 3, or (at your option) | |
11 | ;; any later version. | |
12 | ||
13 | ;; GCC is distributed in the hope that it will be useful, | |
14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | ;; GNU General Public License for more details. | |
17 | ||
18 | ;; You should have received a copy of the GNU General Public License | |
19 | ;; along with GCC; see the file COPYING3. If not see | |
20 | ;; <http://www.gnu.org/licenses/>. | |
2d41ed58 AP |
21 | |
22 | ||
23 | ;; Thunder is a dual-issue processor that can issue all instructions on | |
24 | ;; pipe0 and a subset on pipe1. | |
25 | ||
26 | ||
27 | (define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd") | |
28 | ||
29 | (define_cpu_unit "thunderx_pipe0" "thunderx_main") | |
30 | (define_cpu_unit "thunderx_pipe1" "thunderx_main") | |
31 | (define_cpu_unit "thunderx_mult" "thunderx_mult") | |
32 | (define_cpu_unit "thunderx_divide" "thunderx_divide") | |
33 | (define_cpu_unit "thunderx_simd" "thunderx_simd") | |
34 | ||
35 | (define_insn_reservation "thunderx_add" 1 | |
36 | (and (eq_attr "tune" "thunderx") | |
37 | (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_sreg,alus_imm,alus_sreg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg")) | |
38 | "thunderx_pipe0 | thunderx_pipe1") | |
39 | ||
40 | (define_insn_reservation "thunderx_shift" 1 | |
41 | (and (eq_attr "tune" "thunderx") | |
94f7a25e | 42 | (eq_attr "type" "bfm,bfx,extend,rotate_imm,shift_imm,shift_reg,rbit,rev")) |
2d41ed58 AP |
43 | "thunderx_pipe0 | thunderx_pipe1") |
44 | ||
45 | ||
46 | ;; Arthimentic instructions with an extra shift or extend is two cycles. | |
47 | ;; FIXME: This needs more attributes on aarch64 than what is currently there; | |
48 | ;; this is conserative for now. | |
49 | ;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3) | |
50 | ;; Except this is not correct as this is only for !(zero extend) | |
51 | ||
52 | (define_insn_reservation "thunderx_arith_shift" 2 | |
53 | (and (eq_attr "tune" "thunderx") | |
54 | (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm")) | |
55 | "thunderx_pipe0 | thunderx_pipe1") | |
56 | ||
57 | (define_insn_reservation "thunderx_csel" 2 | |
58 | (and (eq_attr "tune" "thunderx") | |
59 | (eq_attr "type" "csel")) | |
60 | "thunderx_pipe0 | thunderx_pipe1") | |
61 | ||
62 | ;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1 | |
63 | ||
64 | (define_insn_reservation "thunderx_mul" 4 | |
65 | (and (eq_attr "tune" "thunderx") | |
66 | (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal")) | |
67 | "thunderx_pipe1 + thunderx_mult") | |
68 | ||
ed9fa8d2 | 69 | ;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1 |
2d41ed58 | 70 | |
ed9fa8d2 AP |
71 | (define_insn_reservation "thunderx_crc32" 4 |
72 | (and (eq_attr "tune" "thunderx") | |
73 | (eq_attr "type" "crc")) | |
74 | "thunderx_pipe1 + thunderx_mult") | |
75 | ||
76 | ;; crcx is 5 cycles and only happen on pipe 1 | |
77 | ;(define_insn_reservation "thunderx_crc64" 5 | |
2d41ed58 | 78 | ; (and (eq_attr "tune" "thunderx") |
ed9fa8d2 AP |
79 | ; (eq_attr "type" "crc") |
80 | ; (eq_attr "mode" "DI")) | |
2d41ed58 AP |
81 | ; "thunderx_pipe1 + thunderx_mult") |
82 | ||
83 | (define_insn_reservation "thunderx_div32" 22 | |
84 | (and (eq_attr "tune" "thunderx") | |
85 | (eq_attr "type" "udiv,sdiv")) | |
86 | "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21") | |
87 | ||
88 | ;(define_insn_reservation "thunderx_div64" 38 | |
89 | ; (and (eq_attr "tune" "thunderx") | |
90 | ; (eq_attr "type" "udiv,sdiv") | |
91 | ; (eq_attr "mode" "DI")) | |
92 | ; "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34") | |
93 | ||
94 | ;; Stores take one cycle in pipe 0 | |
95 | (define_insn_reservation "thunderx_store" 1 | |
96 | (and (eq_attr "tune" "thunderx") | |
89b2133e | 97 | (eq_attr "type" "store_4")) |
2d41ed58 AP |
98 | "thunderx_pipe0") |
99 | ||
100 | ;; Store pair are single issued | |
101 | (define_insn_reservation "thunderx_storepair" 1 | |
102 | (and (eq_attr "tune" "thunderx") | |
db46a2e6 | 103 | (eq_attr "type" "store_8,store_16")) |
2d41ed58 AP |
104 | "thunderx_pipe0 + thunderx_pipe1") |
105 | ||
ed9fa8d2 AP |
106 | ;; Prefetch are single issued |
107 | ;(define_insn_reservation "thunderx_prefetch" 1 | |
108 | ; (and (eq_attr "tune" "thunderx") | |
109 | ; (eq_attr "type" "prefetch")) | |
110 | ; "thunderx_pipe0 + thunderx_pipe1") | |
2d41ed58 AP |
111 | |
112 | ;; loads (and load pairs) from L1 take 3 cycles in pipe 0 | |
113 | (define_insn_reservation "thunderx_load" 3 | |
114 | (and (eq_attr "tune" "thunderx") | |
db46a2e6 | 115 | (eq_attr "type" "load_4, load_8, load_16")) |
2d41ed58 AP |
116 | "thunderx_pipe0") |
117 | ||
118 | (define_insn_reservation "thunderx_brj" 1 | |
119 | (and (eq_attr "tune" "thunderx") | |
120 | (eq_attr "type" "branch,trap,call")) | |
121 | "thunderx_pipe1") | |
122 | ||
123 | ;; FPU | |
124 | ||
125 | (define_insn_reservation "thunderx_fadd" 4 | |
126 | (and (eq_attr "tune" "thunderx") | |
127 | (eq_attr "type" "faddd,fadds")) | |
128 | "thunderx_pipe1") | |
129 | ||
130 | (define_insn_reservation "thunderx_fconst" 1 | |
131 | (and (eq_attr "tune" "thunderx") | |
132 | (eq_attr "type" "fconsts,fconstd")) | |
133 | "thunderx_pipe1") | |
134 | ||
ed9fa8d2 | 135 | ;; Moves between fp are 2 cycles including min/max |
2d41ed58 AP |
136 | (define_insn_reservation "thunderx_fmov" 2 |
137 | (and (eq_attr "tune" "thunderx") | |
ed9fa8d2 AP |
138 | (eq_attr "type" "fmov,f_minmaxs,f_minmaxd")) |
139 | "thunderx_pipe1") | |
140 | ||
141 | ;; ABS, and NEG are 1 cycle | |
142 | (define_insn_reservation "thunderx_fabs" 1 | |
143 | (and (eq_attr "tune" "thunderx") | |
144 | (eq_attr "type" "ffariths,ffarithd")) | |
145 | "thunderx_pipe1") | |
146 | ||
147 | (define_insn_reservation "thunderx_fcsel" 3 | |
148 | (and (eq_attr "tune" "thunderx") | |
149 | (eq_attr "type" "fcsel")) | |
2d41ed58 AP |
150 | "thunderx_pipe1") |
151 | ||
152 | (define_insn_reservation "thunderx_fmovgpr" 2 | |
153 | (and (eq_attr "tune" "thunderx") | |
154 | (eq_attr "type" "f_mrc, f_mcr")) | |
155 | "thunderx_pipe1") | |
156 | ||
ed9fa8d2 AP |
157 | (define_insn_reservation "thunderx_fcmp" 3 |
158 | (and (eq_attr "tune" "thunderx") | |
c297d256 | 159 | (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd")) |
ed9fa8d2 AP |
160 | "thunderx_pipe1") |
161 | ||
2d41ed58 AP |
162 | (define_insn_reservation "thunderx_fmul" 6 |
163 | (and (eq_attr "tune" "thunderx") | |
164 | (eq_attr "type" "fmacs,fmacd,fmuls,fmuld")) | |
165 | "thunderx_pipe1") | |
166 | ||
167 | (define_insn_reservation "thunderx_fdivs" 12 | |
168 | (and (eq_attr "tune" "thunderx") | |
169 | (eq_attr "type" "fdivs")) | |
170 | "thunderx_pipe1 + thunderx_divide, thunderx_divide*8") | |
171 | ||
172 | (define_insn_reservation "thunderx_fdivd" 22 | |
173 | (and (eq_attr "tune" "thunderx") | |
174 | (eq_attr "type" "fdivd")) | |
175 | "thunderx_pipe1 + thunderx_divide, thunderx_divide*18") | |
176 | ||
177 | (define_insn_reservation "thunderx_fsqrts" 17 | |
178 | (and (eq_attr "tune" "thunderx") | |
179 | (eq_attr "type" "fsqrts")) | |
180 | "thunderx_pipe1 + thunderx_divide, thunderx_divide*13") | |
181 | ||
ed9fa8d2 | 182 | (define_insn_reservation "thunderx_fsqrtd" 31 |
2d41ed58 AP |
183 | (and (eq_attr "tune" "thunderx") |
184 | (eq_attr "type" "fsqrtd")) | |
ed9fa8d2 | 185 | "thunderx_pipe1 + thunderx_divide, thunderx_divide*27") |
2d41ed58 AP |
186 | |
187 | ;; The rounding conversion inside fp is 4 cycles | |
188 | (define_insn_reservation "thunderx_frint" 4 | |
189 | (and (eq_attr "tune" "thunderx") | |
ed9fa8d2 | 190 | (eq_attr "type" "f_cvt,f_rints,f_rintd")) |
2d41ed58 AP |
191 | "thunderx_pipe1") |
192 | ||
193 | ;; Float to integer with a move from int to/from float is 6 cycles | |
194 | (define_insn_reservation "thunderx_f_cvt" 6 | |
195 | (and (eq_attr "tune" "thunderx") | |
ed9fa8d2 | 196 | (eq_attr "type" "f_cvtf2i,f_cvti2f")) |
2d41ed58 AP |
197 | "thunderx_pipe1") |
198 | ||
199 | ;; FP/SIMD load/stores happen in pipe 0 | |
200 | ;; 64bit Loads register/pairs are 4 cycles from L1 | |
201 | (define_insn_reservation "thunderx_64simd_fp_load" 4 | |
202 | (and (eq_attr "tune" "thunderx") | |
203 | (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\ | |
204 | neon_load1_1reg_q,neon_load1_2reg")) | |
205 | "thunderx_pipe0") | |
206 | ||
207 | ;; 128bit load pair is singled issue and 4 cycles from L1 | |
208 | (define_insn_reservation "thunderx_128simd_pair_load" 4 | |
209 | (and (eq_attr "tune" "thunderx") | |
210 | (eq_attr "type" "neon_load1_2reg_q")) | |
211 | "thunderx_pipe0+thunderx_pipe1") | |
212 | ||
213 | ;; FP/SIMD Stores takes one cycle in pipe 0 | |
ed9fa8d2 AP |
214 | ;; ST1 with one registers either multiple structures or single structure is |
215 | ;; also one cycle. | |
2d41ed58 AP |
216 | (define_insn_reservation "thunderx_simd_fp_store" 1 |
217 | (and (eq_attr "tune" "thunderx") | |
ed9fa8d2 AP |
218 | (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \ |
219 | neon_store1_one_lane, neon_store1_one_lane_q")) | |
2d41ed58 AP |
220 | "thunderx_pipe0") |
221 | ||
222 | ;; 64bit neon store pairs are single issue for one cycle | |
223 | (define_insn_reservation "thunderx_64neon_storepair" 1 | |
224 | (and (eq_attr "tune" "thunderx") | |
225 | (eq_attr "type" "neon_store1_2reg")) | |
226 | "thunderx_pipe0 + thunderx_pipe1") | |
227 | ||
228 | ;; 128bit neon store pair are single issued for two cycles | |
229 | (define_insn_reservation "thunderx_128neon_storepair" 2 | |
230 | (and (eq_attr "tune" "thunderx") | |
231 | (eq_attr "type" "neon_store1_2reg_q")) | |
232 | "(thunderx_pipe0 + thunderx_pipe1)*2") | |
233 | ||
ed9fa8d2 AP |
234 | ;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0 |
235 | (define_insn_reservation "thunderx_neon_ld1" 6 | |
236 | (and (eq_attr "tune" "thunderx") | |
237 | (eq_attr "type" "neon_load1_all_lanes")) | |
238 | "thunderx_pipe0") | |
2d41ed58 AP |
239 | |
240 | ;; SIMD/NEON (q forms take an extra cycle) | |
ed9fa8d2 | 241 | ;; SIMD For ThunderX is 64bit wide, |
2d41ed58 | 242 | |
ed9fa8d2 AP |
243 | ;; ThunderX simd move instruction types - 2/3 cycles |
244 | ;; ThunderX dup, ins is the same | |
245 | ;; ThunderX SIMD fabs/fneg instruction types | |
2d41ed58 AP |
246 | (define_insn_reservation "thunderx_neon_move" 2 |
247 | (and (eq_attr "tune" "thunderx") | |
248 | (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \ | |
ed9fa8d2 AP |
249 | neon_fp_compare_d, neon_move, neon_dup, \ |
250 | neon_ins, neon_from_gp, neon_to_gp, \ | |
251 | neon_abs, neon_neg, \ | |
252 | neon_fp_neg_s, neon_fp_abs_s")) | |
2d41ed58 AP |
253 | "thunderx_pipe1 + thunderx_simd") |
254 | ||
255 | (define_insn_reservation "thunderx_neon_move_q" 3 | |
256 | (and (eq_attr "tune" "thunderx") | |
257 | (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \ | |
ed9fa8d2 AP |
258 | neon_fp_compare_d_q, neon_move_q, neon_dup_q, \ |
259 | neon_ins_q, neon_from_gp_q, neon_to_gp_q, \ | |
260 | neon_abs_q, neon_neg_q, \ | |
261 | neon_fp_neg_s_q, neon_fp_neg_d_q, \ | |
262 | neon_fp_abs_s_q, neon_fp_abs_d_q")) | |
2d41ed58 AP |
263 | "thunderx_pipe1 + thunderx_simd, thunderx_simd") |
264 | ||
ed9fa8d2 | 265 | ;; ThunderX simd simple/add instruction types - 4/5 cycles |
2d41ed58 AP |
266 | |
267 | (define_insn_reservation "thunderx_neon_add" 4 | |
268 | (and (eq_attr "tune" "thunderx") | |
269 | (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \ | |
270 | neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \ | |
271 | neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \ | |
272 | neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \ | |
273 | neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \ | |
ed9fa8d2 AP |
274 | neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \ |
275 | neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \ | |
276 | neon_fp_reduc_minmax_s")) | |
2d41ed58 AP |
277 | "thunderx_pipe1 + thunderx_simd") |
278 | ||
279 | ;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect | |
280 | ||
281 | (define_insn_reservation "thunderx_neon_add_q" 5 | |
282 | (and (eq_attr "tune" "thunderx") | |
283 | (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \ | |
284 | neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \ | |
285 | neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \ | |
286 | neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \ | |
287 | neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \ | |
288 | neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \ | |
ed9fa8d2 AP |
289 | neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \ |
290 | neon_arith_acc_q, neon_rev_q, \ | |
291 | neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q")) | |
2d41ed58 AP |
292 | "thunderx_pipe1 + thunderx_simd, thunderx_simd") |
293 | ||
ed9fa8d2 AP |
294 | ;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions |
295 | ;; are 6/7 cycles | |
296 | (define_insn_reservation "thunderx_neon_mult" 6 | |
297 | (and (eq_attr "tune" "thunderx") | |
298 | (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \ | |
299 | neon_mla_b, neon_mla_h, neon_mla_s, \ | |
300 | neon_mla_h_scalar, neon_mla_s_scalar, \ | |
301 | neon_ext, neon_shift_imm, neon_permute, \ | |
302 | neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \ | |
303 | neon_sat_shift_reg, neon_shift_acc, \ | |
304 | neon_mul_b, neon_mul_h, neon_mul_s, \ | |
305 | neon_mul_h_scalar, neon_mul_s_scalar, \ | |
306 | neon_fp_mul_s_scalar, \ | |
307 | neon_fp_mla_s_scalar")) | |
308 | "thunderx_pipe1 + thunderx_simd") | |
309 | ||
310 | (define_insn_reservation "thunderx_neon_mult_q" 7 | |
311 | (and (eq_attr "tune" "thunderx") | |
312 | (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \ | |
313 | neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \ | |
314 | neon_mla_h_scalar_q, neon_mla_s_scalar_q, \ | |
315 | neon_ext_q, neon_shift_imm_q, neon_permute_q, \ | |
316 | neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \ | |
317 | neon_sat_shift_reg_q, neon_shift_acc_q, \ | |
318 | neon_shift_imm_long, \ | |
319 | neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \ | |
320 | neon_mul_h_scalar_q, neon_mul_s_scalar_q, \ | |
321 | neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \ | |
322 | neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \ | |
323 | neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \ | |
324 | neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q")) | |
325 | "thunderx_pipe1 + thunderx_simd, thunderx_simd") | |
326 | ||
327 | ||
328 | ;; AES[ED] is 5 cycles | |
329 | (define_insn_reservation "thunderx_crypto_aese" 5 | |
330 | (and (eq_attr "tune" "thunderx") | |
331 | (eq_attr "type" "crypto_aese")) | |
332 | "thunderx_pipe1 + thunderx_simd, thunderx_simd") | |
2d41ed58 | 333 | |
ed9fa8d2 AP |
334 | ;; AES{,I}MC is 3 cycles |
335 | (define_insn_reservation "thunderx_crypto_aesmc" 3 | |
336 | (and (eq_attr "tune" "thunderx") | |
337 | (eq_attr "type" "crypto_aesmc")) | |
338 | "thunderx_pipe1 + thunderx_simd, thunderx_simd") | |
339 | ||
340 | ||
341 | ;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle | |
342 | (define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q") | |
343 | (define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q") | |
344 | (define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q") | |
345 | ||
346 | ;; 64bit TBL is emulated and takes 160 cycles | |
347 | (define_insn_reservation "thunderx_tbl" 160 | |
348 | (and (eq_attr "tune" "thunderx") | |
349 | (eq_attr "type" "neon_tbl1")) | |
350 | "(thunderx_pipe1+thunderx_pipe0)*160") | |
351 | ||
352 | ;; 128bit TBL is emulated and takes 320 cycles | |
353 | (define_insn_reservation "thunderx_tblq" 320 | |
354 | (and (eq_attr "tune" "thunderx") | |
355 | (eq_attr "type" "neon_tbl1_q")) | |
356 | "(thunderx_pipe1+thunderx_pipe0)*320") | |
2d41ed58 AP |
357 | |
358 | ;; Assume both pipes are needed for unknown and multiple-instruction | |
359 | ;; patterns. | |
360 | ||
361 | (define_insn_reservation "thunderx_unknown" 1 | |
362 | (and (eq_attr "tune" "thunderx") | |
363 | (eq_attr "type" "untyped,multiple")) | |
364 | "thunderx_pipe0 + thunderx_pipe1") | |
365 | ||
366 |