]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/thunderx.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / thunderx.md
CommitLineData
2d41ed58 1;; Cavium ThunderX pipeline description
8d9254fc 2;; Copyright (C) 2014-2020 Free Software Foundation, Inc.
2d41ed58
AP
3;;
4;; Written by Andrew Pinski <apinski@cavium.com>
5
6;; This file is part of GCC.
7
8;; GCC is free software; you can redistribute it and/or modify
9;; it under the terms of the GNU General Public License as published by
10;; the Free Software Foundation; either version 3, or (at your option)
11;; any later version.
12
13;; GCC is distributed in the hope that it will be useful,
14;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16;; GNU General Public License for more details.
17
18;; You should have received a copy of the GNU General Public License
19;; along with GCC; see the file COPYING3. If not see
20;; <http://www.gnu.org/licenses/>.
2d41ed58
AP
21
22
23;; Thunder is a dual-issue processor that can issue all instructions on
24;; pipe0 and a subset on pipe1.
25
26
27(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd")
28
29(define_cpu_unit "thunderx_pipe0" "thunderx_main")
30(define_cpu_unit "thunderx_pipe1" "thunderx_main")
31(define_cpu_unit "thunderx_mult" "thunderx_mult")
32(define_cpu_unit "thunderx_divide" "thunderx_divide")
33(define_cpu_unit "thunderx_simd" "thunderx_simd")
34
35(define_insn_reservation "thunderx_add" 1
36 (and (eq_attr "tune" "thunderx")
37 (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_sreg,alus_imm,alus_sreg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg"))
38 "thunderx_pipe0 | thunderx_pipe1")
39
40(define_insn_reservation "thunderx_shift" 1
41 (and (eq_attr "tune" "thunderx")
94f7a25e 42 (eq_attr "type" "bfm,bfx,extend,rotate_imm,shift_imm,shift_reg,rbit,rev"))
2d41ed58
AP
43 "thunderx_pipe0 | thunderx_pipe1")
44
45
46;; Arthimentic instructions with an extra shift or extend is two cycles.
47;; FIXME: This needs more attributes on aarch64 than what is currently there;
48;; this is conserative for now.
49;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3)
50;; Except this is not correct as this is only for !(zero extend)
51
52(define_insn_reservation "thunderx_arith_shift" 2
53 (and (eq_attr "tune" "thunderx")
54 (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
55 "thunderx_pipe0 | thunderx_pipe1")
56
57(define_insn_reservation "thunderx_csel" 2
58 (and (eq_attr "tune" "thunderx")
59 (eq_attr "type" "csel"))
60 "thunderx_pipe0 | thunderx_pipe1")
61
62;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1
63
64(define_insn_reservation "thunderx_mul" 4
65 (and (eq_attr "tune" "thunderx")
66 (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
67 "thunderx_pipe1 + thunderx_mult")
68
ed9fa8d2 69;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1
2d41ed58 70
ed9fa8d2
AP
71(define_insn_reservation "thunderx_crc32" 4
72 (and (eq_attr "tune" "thunderx")
73 (eq_attr "type" "crc"))
74 "thunderx_pipe1 + thunderx_mult")
75
76;; crcx is 5 cycles and only happen on pipe 1
77;(define_insn_reservation "thunderx_crc64" 5
2d41ed58 78; (and (eq_attr "tune" "thunderx")
ed9fa8d2
AP
79; (eq_attr "type" "crc")
80; (eq_attr "mode" "DI"))
2d41ed58
AP
81; "thunderx_pipe1 + thunderx_mult")
82
83(define_insn_reservation "thunderx_div32" 22
84 (and (eq_attr "tune" "thunderx")
85 (eq_attr "type" "udiv,sdiv"))
86 "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21")
87
88;(define_insn_reservation "thunderx_div64" 38
89; (and (eq_attr "tune" "thunderx")
90; (eq_attr "type" "udiv,sdiv")
91; (eq_attr "mode" "DI"))
92; "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34")
93
94;; Stores take one cycle in pipe 0
95(define_insn_reservation "thunderx_store" 1
96 (and (eq_attr "tune" "thunderx")
89b2133e 97 (eq_attr "type" "store_4"))
2d41ed58
AP
98 "thunderx_pipe0")
99
100;; Store pair are single issued
101(define_insn_reservation "thunderx_storepair" 1
102 (and (eq_attr "tune" "thunderx")
db46a2e6 103 (eq_attr "type" "store_8,store_16"))
2d41ed58
AP
104 "thunderx_pipe0 + thunderx_pipe1")
105
ed9fa8d2
AP
106;; Prefetch are single issued
107;(define_insn_reservation "thunderx_prefetch" 1
108; (and (eq_attr "tune" "thunderx")
109; (eq_attr "type" "prefetch"))
110; "thunderx_pipe0 + thunderx_pipe1")
2d41ed58
AP
111
112;; loads (and load pairs) from L1 take 3 cycles in pipe 0
113(define_insn_reservation "thunderx_load" 3
114 (and (eq_attr "tune" "thunderx")
db46a2e6 115 (eq_attr "type" "load_4, load_8, load_16"))
2d41ed58
AP
116 "thunderx_pipe0")
117
118(define_insn_reservation "thunderx_brj" 1
119 (and (eq_attr "tune" "thunderx")
120 (eq_attr "type" "branch,trap,call"))
121 "thunderx_pipe1")
122
123;; FPU
124
125(define_insn_reservation "thunderx_fadd" 4
126 (and (eq_attr "tune" "thunderx")
127 (eq_attr "type" "faddd,fadds"))
128 "thunderx_pipe1")
129
130(define_insn_reservation "thunderx_fconst" 1
131 (and (eq_attr "tune" "thunderx")
132 (eq_attr "type" "fconsts,fconstd"))
133 "thunderx_pipe1")
134
ed9fa8d2 135;; Moves between fp are 2 cycles including min/max
2d41ed58
AP
136(define_insn_reservation "thunderx_fmov" 2
137 (and (eq_attr "tune" "thunderx")
ed9fa8d2
AP
138 (eq_attr "type" "fmov,f_minmaxs,f_minmaxd"))
139 "thunderx_pipe1")
140
141;; ABS, and NEG are 1 cycle
142(define_insn_reservation "thunderx_fabs" 1
143 (and (eq_attr "tune" "thunderx")
144 (eq_attr "type" "ffariths,ffarithd"))
145 "thunderx_pipe1")
146
147(define_insn_reservation "thunderx_fcsel" 3
148 (and (eq_attr "tune" "thunderx")
149 (eq_attr "type" "fcsel"))
2d41ed58
AP
150 "thunderx_pipe1")
151
152(define_insn_reservation "thunderx_fmovgpr" 2
153 (and (eq_attr "tune" "thunderx")
154 (eq_attr "type" "f_mrc, f_mcr"))
155 "thunderx_pipe1")
156
ed9fa8d2
AP
157(define_insn_reservation "thunderx_fcmp" 3
158 (and (eq_attr "tune" "thunderx")
c297d256 159 (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
ed9fa8d2
AP
160 "thunderx_pipe1")
161
2d41ed58
AP
162(define_insn_reservation "thunderx_fmul" 6
163 (and (eq_attr "tune" "thunderx")
164 (eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
165 "thunderx_pipe1")
166
167(define_insn_reservation "thunderx_fdivs" 12
168 (and (eq_attr "tune" "thunderx")
169 (eq_attr "type" "fdivs"))
170 "thunderx_pipe1 + thunderx_divide, thunderx_divide*8")
171
172(define_insn_reservation "thunderx_fdivd" 22
173 (and (eq_attr "tune" "thunderx")
174 (eq_attr "type" "fdivd"))
175 "thunderx_pipe1 + thunderx_divide, thunderx_divide*18")
176
177(define_insn_reservation "thunderx_fsqrts" 17
178 (and (eq_attr "tune" "thunderx")
179 (eq_attr "type" "fsqrts"))
180 "thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
181
ed9fa8d2 182(define_insn_reservation "thunderx_fsqrtd" 31
2d41ed58
AP
183 (and (eq_attr "tune" "thunderx")
184 (eq_attr "type" "fsqrtd"))
ed9fa8d2 185 "thunderx_pipe1 + thunderx_divide, thunderx_divide*27")
2d41ed58
AP
186
187;; The rounding conversion inside fp is 4 cycles
188(define_insn_reservation "thunderx_frint" 4
189 (and (eq_attr "tune" "thunderx")
ed9fa8d2 190 (eq_attr "type" "f_cvt,f_rints,f_rintd"))
2d41ed58
AP
191 "thunderx_pipe1")
192
193;; Float to integer with a move from int to/from float is 6 cycles
194(define_insn_reservation "thunderx_f_cvt" 6
195 (and (eq_attr "tune" "thunderx")
ed9fa8d2 196 (eq_attr "type" "f_cvtf2i,f_cvti2f"))
2d41ed58
AP
197 "thunderx_pipe1")
198
199;; FP/SIMD load/stores happen in pipe 0
200;; 64bit Loads register/pairs are 4 cycles from L1
201(define_insn_reservation "thunderx_64simd_fp_load" 4
202 (and (eq_attr "tune" "thunderx")
203 (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\
204 neon_load1_1reg_q,neon_load1_2reg"))
205 "thunderx_pipe0")
206
207;; 128bit load pair is singled issue and 4 cycles from L1
208(define_insn_reservation "thunderx_128simd_pair_load" 4
209 (and (eq_attr "tune" "thunderx")
210 (eq_attr "type" "neon_load1_2reg_q"))
211 "thunderx_pipe0+thunderx_pipe1")
212
213;; FP/SIMD Stores takes one cycle in pipe 0
ed9fa8d2
AP
214;; ST1 with one registers either multiple structures or single structure is
215;; also one cycle.
2d41ed58
AP
216(define_insn_reservation "thunderx_simd_fp_store" 1
217 (and (eq_attr "tune" "thunderx")
ed9fa8d2
AP
218 (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \
219 neon_store1_one_lane, neon_store1_one_lane_q"))
2d41ed58
AP
220 "thunderx_pipe0")
221
222;; 64bit neon store pairs are single issue for one cycle
223(define_insn_reservation "thunderx_64neon_storepair" 1
224 (and (eq_attr "tune" "thunderx")
225 (eq_attr "type" "neon_store1_2reg"))
226 "thunderx_pipe0 + thunderx_pipe1")
227
228;; 128bit neon store pair are single issued for two cycles
229(define_insn_reservation "thunderx_128neon_storepair" 2
230 (and (eq_attr "tune" "thunderx")
231 (eq_attr "type" "neon_store1_2reg_q"))
232 "(thunderx_pipe0 + thunderx_pipe1)*2")
233
ed9fa8d2
AP
234;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0
235(define_insn_reservation "thunderx_neon_ld1" 6
236 (and (eq_attr "tune" "thunderx")
237 (eq_attr "type" "neon_load1_all_lanes"))
238 "thunderx_pipe0")
2d41ed58
AP
239
240;; SIMD/NEON (q forms take an extra cycle)
ed9fa8d2 241;; SIMD For ThunderX is 64bit wide,
2d41ed58 242
ed9fa8d2
AP
243;; ThunderX simd move instruction types - 2/3 cycles
244;; ThunderX dup, ins is the same
245;; ThunderX SIMD fabs/fneg instruction types
2d41ed58
AP
246(define_insn_reservation "thunderx_neon_move" 2
247 (and (eq_attr "tune" "thunderx")
248 (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
ed9fa8d2
AP
249 neon_fp_compare_d, neon_move, neon_dup, \
250 neon_ins, neon_from_gp, neon_to_gp, \
251 neon_abs, neon_neg, \
252 neon_fp_neg_s, neon_fp_abs_s"))
2d41ed58
AP
253 "thunderx_pipe1 + thunderx_simd")
254
255(define_insn_reservation "thunderx_neon_move_q" 3
256 (and (eq_attr "tune" "thunderx")
257 (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
ed9fa8d2
AP
258 neon_fp_compare_d_q, neon_move_q, neon_dup_q, \
259 neon_ins_q, neon_from_gp_q, neon_to_gp_q, \
260 neon_abs_q, neon_neg_q, \
261 neon_fp_neg_s_q, neon_fp_neg_d_q, \
262 neon_fp_abs_s_q, neon_fp_abs_d_q"))
2d41ed58
AP
263 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
264
ed9fa8d2 265;; ThunderX simd simple/add instruction types - 4/5 cycles
2d41ed58
AP
266
267(define_insn_reservation "thunderx_neon_add" 4
268 (and (eq_attr "tune" "thunderx")
269 (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \
270 neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \
271 neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
272 neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
273 neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
ed9fa8d2
AP
274 neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \
275 neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \
276 neon_fp_reduc_minmax_s"))
2d41ed58
AP
277 "thunderx_pipe1 + thunderx_simd")
278
279;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
280
281(define_insn_reservation "thunderx_neon_add_q" 5
282 (and (eq_attr "tune" "thunderx")
283 (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \
284 neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \
285 neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \
286 neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
287 neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
288 neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
ed9fa8d2
AP
289 neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \
290 neon_arith_acc_q, neon_rev_q, \
291 neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q"))
2d41ed58
AP
292 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
293
ed9fa8d2
AP
294;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions
295;; are 6/7 cycles
296(define_insn_reservation "thunderx_neon_mult" 6
297 (and (eq_attr "tune" "thunderx")
298 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \
299 neon_mla_b, neon_mla_h, neon_mla_s, \
300 neon_mla_h_scalar, neon_mla_s_scalar, \
301 neon_ext, neon_shift_imm, neon_permute, \
302 neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \
303 neon_sat_shift_reg, neon_shift_acc, \
304 neon_mul_b, neon_mul_h, neon_mul_s, \
305 neon_mul_h_scalar, neon_mul_s_scalar, \
306 neon_fp_mul_s_scalar, \
307 neon_fp_mla_s_scalar"))
308 "thunderx_pipe1 + thunderx_simd")
309
310(define_insn_reservation "thunderx_neon_mult_q" 7
311 (and (eq_attr "tune" "thunderx")
312 (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \
313 neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \
314 neon_mla_h_scalar_q, neon_mla_s_scalar_q, \
315 neon_ext_q, neon_shift_imm_q, neon_permute_q, \
316 neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \
317 neon_sat_shift_reg_q, neon_shift_acc_q, \
318 neon_shift_imm_long, \
319 neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \
320 neon_mul_h_scalar_q, neon_mul_s_scalar_q, \
321 neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \
322 neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \
323 neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \
324 neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q"))
325 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
326
327
328;; AES[ED] is 5 cycles
329(define_insn_reservation "thunderx_crypto_aese" 5
330 (and (eq_attr "tune" "thunderx")
331 (eq_attr "type" "crypto_aese"))
332 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
2d41ed58 333
ed9fa8d2
AP
334;; AES{,I}MC is 3 cycles
335(define_insn_reservation "thunderx_crypto_aesmc" 3
336 (and (eq_attr "tune" "thunderx")
337 (eq_attr "type" "crypto_aesmc"))
338 "thunderx_pipe1 + thunderx_simd, thunderx_simd")
339
340
341;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle
342(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
343(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
344(define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
345
346;; 64bit TBL is emulated and takes 160 cycles
347(define_insn_reservation "thunderx_tbl" 160
348 (and (eq_attr "tune" "thunderx")
349 (eq_attr "type" "neon_tbl1"))
350 "(thunderx_pipe1+thunderx_pipe0)*160")
351
352;; 128bit TBL is emulated and takes 320 cycles
353(define_insn_reservation "thunderx_tblq" 320
354 (and (eq_attr "tune" "thunderx")
355 (eq_attr "type" "neon_tbl1_q"))
356 "(thunderx_pipe1+thunderx_pipe0)*320")
2d41ed58
AP
357
358;; Assume both pipes are needed for unknown and multiple-instruction
359;; patterns.
360
361(define_insn_reservation "thunderx_unknown" 1
362 (and (eq_attr "tune" "thunderx")
363 (eq_attr "type" "untyped,multiple"))
364 "thunderx_pipe0 + thunderx_pipe1")
365
366