]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/arm/exynos-m1.md
re PR c++/69753 (bogus: expected primary-expression before ‘>’ token)
[thirdparty/gcc.git] / gcc / config / arm / exynos-m1.md
CommitLineData
b2ca46df 1;; Samsung Exynos M1 pipeline description
818ab71a 2;; Copyright (C) 2014-2016 Free Software Foundation, Inc.
b2ca46df
EM
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify it
7;; under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful, but
12;; WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14;; General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3. If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_attr "exynos_m1_neon_type"
21 "neon_arith_simple, neon_arith_basic, neon_arith_complex,
22 neon_multiply, neon_mla, neon_mla_q, neon_mla_long, neon_sat_mla_long,
23 neon_shift_acc, neon_shift_imm_basic, neon_shift_imm_complex,
24 neon_shift_reg_basic, neon_shift_reg_basic_q,
25 neon_shift_reg_complex, neon_shift_reg_complex_q,
26 neon_fp_unary, neon_fp_add, neon_fp_abd, neon_fp_compare,
27 neon_fp_reduc_minmax, neon_fp_reduc_add, neon_fp_round, neon_fp_cvt,
28 neon_fp_minmax, neon_fp_mul, neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q,
29 neon_fp_estimate, neon_fp_estimatex, neon_fp_step,
30 neon_bitops, neon_bitops_q, neon_bitins,
31 neon_to_gp, neon_from_gp, neon_move, neon_tbl,
32 neon_load1_1, neon_load1_2, neon_load1_3, neon_load1_4,
33 neon_load1_one, neon_load1_all,
34 neon_load2_2, neon_load2_one, neon_load2_all,
35 neon_load3_3, neon_load3_one, neon_load3_all,
36 neon_load4_4, neon_load4_one, neon_load4_all,
37 neon_store,
38 neon_store1_1, neon_store1_2, neon_store1_3, neon_store1_4, neon_store1_one,
39 neon_store2_2, neon_store2_one,
40 neon_store3_3, neon_store3_one,
41 neon_store4_4, neon_store4_one,
42 unknown"
43 (cond [
44 (eq_attr "type" "neon_abd, neon_abd_q, neon_abd_long,\
45 neon_abs, neon_abs_q,\
46 neon_minmax, neon_minmax_q")
47 (const_string "neon_arith_simple")
48
49 (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\
50 neon_neg, neon_neg_q,\
51 neon_sub, neon_sub_q, neon_sub_long, neon_sub_widen,\
52 neon_logic, neon_logic_q, neon_tst, neon_tst_q,\
53 neon_compare_zero, neon_compare_zero_q")
54 (const_string "neon_arith_basic")
55
56 (eq_attr "type" "neon_add_widen, neon_arith_acc, neon_arith_acc_q,\
57 neon_reduc_add, neon_reduc_add_q,\
58 neon_reduc_add_acc, neon_reduc_add_acc_q,\
59 neon_reduc_add_long, neon_add_halve_narrow_q,\
60 neon_add_halve, neon_add_halve_q,\
61 neon_sub_halve, neon_sub_halve_q, neon_qabs,\
62 neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
63 neon_qneg_q, neon_qsub, neon_qsub_q,\
64 neon_sub_halve_narrow_q,\
65 neon_compare, neon_compare_q,\
66 neon_reduc_minmax, neon_reduc_minmax_q")
67 (const_string "neon_arith_complex")
68
69 (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_h, neon_mul_h_q,\
70 neon_mul_s, neon_mul_s_q,\
71 neon_mul_h_scalar, neon_mul_h_scalar_q,\
72 neon_mul_s_scalar, neon_mul_s_scalar_q,\
73 neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
74 neon_sat_mul_b, neon_sat_mul_b_q,\
75 neon_sat_mul_h, neon_sat_mul_h_q,\
76 neon_sat_mul_s, neon_sat_mul_s_q,\
77 neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\
78 neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\
79 neon_sat_mul_b_long, neon_sat_mul_h_long,\
80 neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
81 neon_sat_mul_s_scalar_long")
82 (const_string "neon_multiply")
83
84 (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\
85 neon_mla_h_scalar, neon_mla_s_scalar,\
86 neon_mla_b_long, neon_mla_h_long,\
87 neon_mla_s_long,\
88 neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
89 neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
90 neon_mla_h_scalar_q, neon_mla_s_scalar_q")
91 (const_string "neon_mla")
92
93 (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\
94 neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
95 neon_sat_mla_s_scalar_long")
96 (const_string "neon_sat_mla_long")
97
98 (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
99 (const_string "neon_shift_acc")
100
101 (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
102 neon_shift_imm_narrow_q, neon_shift_imm_long")
103 (const_string "neon_shift_imm_basic")
104
105 (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
106 neon_sat_shift_imm_narrow_q")
107 (const_string "neon_shift_imm_complex")
108
109 (eq_attr "type" "neon_shift_reg, neon_shift_reg_q")
110 (const_string "neon_shift_reg_basic")
111
112 (eq_attr "type" "neon_sat_shift_reg, neon_sat_shift_reg_q")
113 (const_string "neon_shift_reg_complex")
114
115 (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
116 neon_fp_abs_s, neon_fp_abs_s_q,\
117 neon_fp_neg_d, neon_fp_neg_d_q,\
118 neon_fp_abs_d, neon_fp_abs_d_q")
119 (const_string "neon_fp_unary")
120
121 (eq_attr "type" "neon_fp_addsub_s, neon_fp_addsub_s_q,\
122 neon_fp_addsub_d, neon_fp_addsub_d_q")
123 (const_string "neon_fp_add")
124
125 (eq_attr "type" "neon_fp_abd_s, neon_fp_abd_s_q,\
126 neon_fp_abd_d, neon_fp_abd_d_q")
127 (const_string "neon_fp_abd")
128
129 (eq_attr "type" "neon_fp_compare_s, neon_fp_compare_s_q,\
130 neon_fp_compare_d, neon_fp_compare_d_q,\
131 neon_fp_minmax_s, neon_fp_minmax_s_q,\
132 neon_fp_minmax_d, neon_fp_minmax_d_q")
133 (const_string "neon_fp_compare")
134
135 (eq_attr "type" "neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q,\
136 neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_d_q")
137 (const_string "neon_fp_reduc_minmax")
138
139 (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\
140 neon_fp_reduc_add_d, neon_fp_reduc_add_d_q")
141 (const_string "neon_fp_reduc_add")
142
143 (eq_attr "type" "neon_fp_round_s, neon_fp_round_s_q,\
144 neon_fp_round_d, neon_fp_round_d_q")
145 (const_string "neon_fp_round")
146
147 (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h,
148 neon_fp_to_int_s, neon_fp_to_int_s_q,\
149 neon_fp_to_int_d_q, neon_fp_to_int_d,\
150 neon_int_to_fp_s, neon_int_to_fp_s_q,\
151 neon_int_to_fp_d, neon_int_to_fp_d_q")
152 (const_string "neon_fp_cvt")
153
154 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_q,\
155 neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q,\
156 neon_fp_mul_d, neon_fp_mul_d_q,\
157 neon_fp_mul_d_scalar_q")
158 (const_string "neon_fp_mul")
159
160 (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_q,\
161 neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
162 neon_fp_mla_d, neon_fp_mla_d_q,\
163 neon_fp_mla_d_scalar_q")
164 (const_string "neon_fp_mla")
165
166 (eq_attr "type" "neon_fp_recpe_s, neon_fp_recpe_s_q,\
167 neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
168 neon_fp_recpe_d, neon_fp_recpe_d_q,\
169 neon_fp_rsqrte_d, neon_fp_rsqrte_d_q")
170 (const_string "neon_fp_estimate")
171
172 (eq_attr "type" "neon_fp_recpx_s, neon_fp_recpx_s_q,\
173 neon_fp_recpx_d, neon_fp_recpx_d_q")
174 (const_string "neon_fp_estimatex")
175
176 (eq_attr "type" "neon_fp_recps_s, neon_fp_recps_s_q,\
177 neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
178 neon_fp_recps_d, neon_fp_recps_d_q,\
179 neon_fp_rsqrts_d, neon_fp_rsqrts_d_q")
180 (const_string "neon_fp_step")
181
182 (eq_attr "type" "neon_rbit, neon_rbit_q,\
183 neon_cls, neon_cls_q, neon_cnt, neon_cnt_q,\
184 neon_dup, neon_dup_q,\
185 neon_rev, neon_rev_q,\
186 neon_move, neon_move_q,
187 neon_ext, neon_permute, neon_zip")
188 (const_string "neon_bitops")
189
190 (eq_attr "type" "neon_ext_q, neon_permute_q, neon_zip_q")
191 (const_string "neon_bitops_q")
192
193 (eq_attr "type" "neon_bsl, neon_bsl_q")
194 (const_string "neon_bitins")
195
196 (eq_attr "type" "neon_tbl1, neon_tbl2, neon_tbl3, neon_tbl4")
197 (const_string "neon_tbl")
198
199 (eq_attr "type" "neon_from_gp, neon_from_gp_q, f_mcr, f_mcrr")
200 (const_string "neon_from_gp")
201
202 (eq_attr "type" "neon_to_gp, neon_to_gp_q, f_mrc, f_mrrc")
203 (const_string "neon_to_gp")
204
205 (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q")
206 (const_string "neon_load1_1")
207
208 (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q")
209 (const_string "neon_load1_2")
210
211 (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q")
212 (const_string "neon_load1_3")
213
214 (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q")
215 (const_string "neon_load1_4")
216
217 (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q")
218 (const_string "neon_load1_one")
219
220 (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q")
221 (const_string "neon_load1_all")
222
223 (eq_attr "type" "neon_load2_2reg, neon_load2_2reg_q,\
224 neon_load2_4reg, neon_load2_4reg_q")
225 (const_string "neon_load2_2")
226
227 (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q")
228 (const_string "neon_load2_one")
229
230 (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q")
231 (const_string "neon_load2_all")
232
233 (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q")
234 (const_string "neon_load3_3")
235
236 (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q")
237 (const_string "neon_load3_one")
238
239 (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q")
240 (const_string "neon_load3_all")
241
242 (eq_attr "type" "neon_load4_4reg, neon_load4_4reg_q")
243 (const_string "neon_load4_4")
244
245 (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
246 (const_string "neon_load4_one")
247
248 (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q")
249 (const_string "neon_load4_all")
250
251 (eq_attr "type" "f_stores, f_stored,\
252 neon_stp, neon_stp_q")
253 (const_string "neon_store")
254
255 (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")
256 (const_string "neon_store1_1")
257
258 (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q")
259 (const_string "neon_store1_2")
260
261 (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q")
262 (const_string "neon_store1_3")
263
264 (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q")
265 (const_string "neon_store1_4")
266
267 (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q")
268 (const_string "neon_store1_one")
269
270 (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\
271 neon_store2_4reg, neon_store2_4reg_q")
272 (const_string "neon_store2_2")
273
274 (eq_attr "type" "neon_store2_one_lane, neon_store2_one_lane_q")
275 (const_string "neon_store2_one")
276
277 (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q")
278 (const_string "neon_store3_3")
279
280 (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q")
281 (const_string "neon_store3_one")
282
283 (eq_attr "type" "neon_store4_4reg, neon_store4_4reg_q")
284 (const_string "neon_store4_4")
285
286 (eq_attr "type" "neon_store4_one_lane, neon_store4_one_lane_q")
287 (const_string "neon_store4_one")]
288
289 (const_string "unknown")))
290
291;; The Exynos M1 core is modeled as a triple issue pipeline that has
292;; the following functional units.
293
294(define_automaton "exynos_m1_gp")
295(define_automaton "exynos_m1_ls")
296(define_automaton "exynos_m1_fp")
297
298;; 1. Two pipelines for simple integer operations: A, B
299;; 2. One pipeline for simple or complex integer operations: C
300
301(define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp")
302
303(define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)")
304(define_reservation "em1_c" "em1_xc")
305
306;; 3. Two asymmetric pipelines for Neon and FP operations: F0, F1
307
308(define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp")
309
310(define_reservation "em1_fmac" "em1_f0")
311(define_reservation "em1_fcvt" "em1_f0")
312(define_reservation "em1_nalu" "(em1_f0 | em1_f1)")
313(define_reservation "em1_nalu0" "em1_f0")
314(define_reservation "em1_nalu1" "em1_f1")
315(define_reservation "em1_nmisc" "em1_f0")
316(define_reservation "em1_ncrypt" "em1_f0")
317(define_reservation "em1_fadd" "em1_f1")
318(define_reservation "em1_fvar" "em1_f1")
319(define_reservation "em1_fst" "em1_f1")
320
321;; 4. One pipeline for branch operations: BX
322
323(define_cpu_unit "em1_bx" "exynos_m1_gp")
324
325(define_reservation "em1_br" "em1_bx")
326
327;; 5. One AGU for loads: L
328;; One AGU for stores and one pipeline for stores: S, SD
329
330(define_cpu_unit "em1_lx" "exynos_m1_ls")
331(define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls")
332
333(define_reservation "em1_ld" "em1_lx")
334(define_reservation "em1_st" "(em1_sx + em1_sd)")
335
336;; Common occurrences
337(define_reservation "em1_sfst" "(em1_fst + em1_st)")
338(define_reservation "em1_lfst" "(em1_fst + em1_ld)")
339
340;; Branches
341;;
342;; No latency as there is no result
343;; TODO: Unconditional branches use no units;
344;; conditional branches add the BX unit;
345;; indirect branches add the C unit.
346(define_insn_reservation "exynos_m1_branch" 0
347 (and (eq_attr "tune" "exynosm1")
348 (eq_attr "type" "branch"))
349 "em1_br")
350
351(define_insn_reservation "exynos_m1_call" 1
352 (and (eq_attr "tune" "exynosm1")
353 (eq_attr "type" "call"))
354 "em1_alu")
355
356;; Basic ALU
357;;
358;; Simple ALU without shift, non-predicated
359(define_insn_reservation "exynos_m1_alu" 1
360 (and (eq_attr "tune" "exynosm1")
361 (and (not (eq_attr "predicated" "yes"))
362 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
363 alu_sreg, alus_sreg, logic_reg, logics_reg,\
364 adc_imm, adcs_imm, adc_reg, adcs_reg,\
365 adr, bfm, clz, rbit, rev, csel, alu_dsp_reg,\
366 shift_imm, shift_reg, rotate_imm, extend,\
367 mov_imm, mov_reg,\
368 mvn_imm, mvn_reg,\
369 mrs, multiple")))
370 "em1_alu")
371
372;; Simple ALU without shift, predicated
373(define_insn_reservation "exynos_m1_alu_p" 1
374 (and (eq_attr "tune" "exynosm1")
375 (and (eq_attr "predicated" "yes")
376 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
377 alu_sreg, alus_sreg, logic_reg, logics_reg,\
378 adc_imm, adcs_imm, adc_reg, adcs_reg,\
379 adr, bfm, clz, rbit, rev, alu_dsp_reg,\
380 shift_imm, shift_reg, rotate_imm, extend,\
381 mov_imm, mov_reg,\
382 mvn_imm, mvn_reg,\
383 mrs, multiple")))
384 "em1_c")
385
386;; ALU ops with immediate shift
387;; TODO: if the shift value is between 0 and 3, the latency is just 1 cycle;
388;; otherwise it takes 2 cycles and the unit is blocked;
389;; for now, assume the latter's latency and the former's units.
390(define_insn_reservation "exynos_m1_alu_shift" 2
391 (and (eq_attr "tune" "exynosm1")
392 (eq_attr "type" "alu_ext, alus_ext,\
393 alu_shift_imm, alus_shift_imm,\
394 logic_shift_imm, logics_shift_imm,\
395 mov_shift, mvn_shift"))
396 "(em1_alu)")
397
398;; ALU ops with register controlled shift, non-predicated
399(define_insn_reservation "exynos_m1_alu_shift_reg" 2
400 (and (eq_attr "tune" "exynosm1")
401 (and (not (eq_attr "predicated" "yes"))
402 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
403 logic_shift_reg, logics_shift_reg,\
404 mov_shift_reg, mvn_shift_reg")))
405 "(em1_alu * 2)")
406
407;; ALU ops with register controlled shift, predicated
408(define_insn_reservation "exynos_m1_alu_shift_reg_p" 2
409 (and (eq_attr "tune" "exynosm1")
410 (and (eq_attr "predicated" "yes")
411 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
412 logic_shift_reg, logics_shift_reg,\
413 mov_shift_reg, mvn_shift_reg")))
414 "(em1_alu, em1_c)")
415
416;; Integer multiply
417(define_insn_reservation "exynos_m1_mla" 3
418 (and (eq_attr "tune" "exynosm1")
419 (eq_attr "mul32" "yes"))
420 "em1_c")
421
422(define_insn_reservation "exynos_m1_mlal" 4
423 (and (eq_attr "tune" "exynosm1")
424 (eq_attr "mul64" "yes"))
425 "em1_alu, em1_c")
426
427;; Integer divide
428;; TODO: assume the median latency; blocks other divisions
429(define_insn_reservation "exynos_m1_div" 13
430 (and (eq_attr "tune" "exynosm1")
431 (eq_attr "type" "udiv, sdiv"))
432 "em1_c")
433
434;; Load-store execution Unit
435;;
436;; Loads of up to 2 words.
437(define_insn_reservation "exynos_m1_load" 4
438 (and (eq_attr "tune" "exynosm1")
439 (eq_attr "type" "load_byte, load1, load2"))
440 "em1_ld")
441
442;; Loads of 3 or 4 words.
443(define_insn_reservation "exynos_m1_loadm" 6
444 (and (eq_attr "tune" "exynosm1")
445 (eq_attr "type" "load3, load4"))
446 "(em1_ld * 3)")
447
448;; Stores of up to 2 words.
449(define_insn_reservation "exynos_m1_store" 1
450 (and (eq_attr "tune" "exynosm1")
451 (eq_attr "type" "store1, store2"))
452 "em1_st")
453
454;; Stores of 3 or 4 words.
455(define_insn_reservation "exynos_m1_storem" 3
456 (and (eq_attr "tune" "exynosm1")
457 (eq_attr "type" "store3, store4"))
458 "(em1_st * 3)")
459
460;; Advanced SIMD Unit
461;;
462;; Integer Arithmetic Instructions.
463
464(define_insn_reservation "exynos_m1_arith_simple" 1
465 (and (eq_attr "tune" "exynosm1")
466 (eq_attr "exynos_m1_neon_type" "neon_arith_simple"))
467 "em1_nmisc")
468
469(define_insn_reservation "exynos_m1_neon_arith_basic" 2
470 (and (eq_attr "tune" "exynosm1")
471 (eq_attr "exynos_m1_neon_type" "neon_arith_basic"))
472 "em1_nalu")
473
474(define_insn_reservation "exynos_m1_neon_arith_complex" 3
475 (and (eq_attr "tune" "exynosm1")
476 (eq_attr "exynos_m1_neon_type" "neon_arith_complex"))
477 "em1_nmisc")
478
479;; Integer Multiply Instructions.
480
481(define_insn_reservation "exynos_m1_neon_multiply" 4
482 (and (eq_attr "tune" "exynosm1")
483 (eq_attr "exynos_m1_neon_type"
484 "neon_multiply, neon_mla, neon_sat_mla_long"))
485 "em1_nmisc")
486
487;; Integer Shift Instructions.
488
489(define_insn_reservation
490 "exynos_m1_neon_shift_acc" 4
491 (and (eq_attr "tune" "exynosm1")
492 (eq_attr "exynos_m1_neon_type" "neon_shift_acc"))
493 "em1_nalu1")
494
495(define_insn_reservation
496 "exynos_m1_neon_shift_basic" 2
497 (and (eq_attr "tune" "exynosm1")
498 (eq_attr "exynos_m1_neon_type"
499 "neon_shift_imm_basic, neon_shift_reg_basic"))
500 "em1_nalu")
501
502(define_insn_reservation
503 "exynos_m1_neon_shift_complex" 4
504 (and (eq_attr "tune" "exynosm1")
505 (eq_attr "exynos_m1_neon_type"
506 "neon_shift_imm_complex, neon_shift_reg_complex"))
507 "em1_nalu1")
508
509;; Floating Point Instructions.
510
511(define_insn_reservation
512 "exynos_m1_neon_fp_unary" 2
513 (and (eq_attr "tune" "exynosm1")
514 (eq_attr "exynos_m1_neon_type" "neon_fp_unary"))
515 "em1_nalu")
516
517(define_insn_reservation
518 "exynos_m1_neon_fp_add" 4
519 (and (eq_attr "tune" "exynosm1")
520 (eq_attr "exynos_m1_neon_type" "neon_fp_add"))
521 "em1_fadd")
522
523(define_insn_reservation
524 "exynos_m1_neon_fp_abd" 3
525 (and (eq_attr "tune" "exynosm1")
526 (eq_attr "exynos_m1_neon_type" "neon_fp_abd"))
527 "em1_nmisc")
528
529(define_insn_reservation
530 "exynos_m1_neon_fp_compare" 1
531 (and (eq_attr "tune" "exynosm1")
532 (eq_attr "exynos_m1_neon_type" "neon_fp_compare"))
533 "em1_nmisc")
534
535;; TODO: the latency and throughput of reduce insns actually varies between
536;; 3-5 and 1/4-1, but picked the median values.
537(define_insn_reservation
538 "exynos_m1_neon_fp_reduc" 5
539 (and (eq_attr "tune" "exynosm1")
540 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_minmax"))
541 "(em1_nmisc * 4)")
542
543(define_insn_reservation
544 "exynos_m1_neon_fp_reduc_add" 10
545 (and (eq_attr "tune" "exynosm1")
546 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_add"))
547 "((em1_nalu * 2), em1_fadd)")
548
549(define_insn_reservation
550 "exynos_m1_neon_fp_round" 4
551 (and (eq_attr "tune" "exynosm1")
552 (eq_attr "exynos_m1_neon_type" "neon_fp_round"))
553 "em1_fcvt")
554
555(define_insn_reservation
556 "exynos_m1_neon_fp_cvt" 4
557 (and (eq_attr "tune" "exynosm1")
558 (eq_attr "exynos_m1_neon_type" "neon_fp_cvt"))
559 "em1_fcvt")
560
561(define_insn_reservation
562 "exynos_m1_neon_fp_mul" 5
563 (and (eq_attr "tune" "exynosm1")
564 (eq_attr "exynos_m1_neon_type" "neon_fp_mul"))
565 "em1_fmac")
566
567(define_insn_reservation
568 "exynos_m1_neon_fp_mla" 6
569 (and (eq_attr "tune" "exynosm1")
570 (eq_attr "exynos_m1_neon_type" "neon_fp_mla"))
571 "em1_fmac")
572
573(define_insn_reservation
574 "exynos_m1_neon_fp_estimate" 5
575 (and (eq_attr "tune" "exynosm1")
576 (eq_attr "exynos_m1_neon_type" "neon_fp_estimate"))
577 "em1_fcvt")
578
579(define_insn_reservation
580 "exynos_m1_neon_fp_estimatex" 1
581 (and (eq_attr "tune" "exynosm1")
582 (eq_attr "exynos_m1_neon_type" "neon_fp_estimatex"))
583 "em1_nmisc")
584
585(define_insn_reservation
586 "exynos_m1_neon_fp_step" 6
587 (and (eq_attr "tune" "exynosm1")
588 (eq_attr "exynos_m1_neon_type" "neon_fp_step"))
589 "em1_fmac")
590
591;; Miscellaneous Instructions.
592
593(define_insn_reservation
594 "exynos_m1_neon_bitops" 2
595 (and (eq_attr "tune" "exynosm1")
596 (eq_attr "exynos_m1_neon_type" "neon_bitops"))
597 "em1_nalu")
598
599(define_insn_reservation
600 "exynos_m1_neon_bitops_q" 3
601 (and (eq_attr "tune" "exynosm1")
602 (eq_attr "exynos_m1_neon_type" "neon_bitops_q"))
603 "(em1_nalu, em1_nalu)")
604
605(define_insn_reservation
606 "exynos_m1_neon_bitins" 2
607 (and (eq_attr "tune" "exynosm1")
608 (eq_attr "exynos_m1_neon_type" "neon_bitins"))
609 "em1_nalu1")
610
611;; TODO: it is more complicated than this.
612(define_insn_reservation
613 "exynos_m1_neon_tbl" 2
614 (and (eq_attr "tune" "exynosm1")
615 (eq_attr "exynos_m1_neon_type" "neon_tbl"))
616 "em1_nalu1")
617
618(define_insn_reservation
619 "exynos_m1_neon_from_gp" 4
620 (and (eq_attr "tune" "exynosm1")
621 (eq_attr "exynos_m1_neon_type" "neon_from_gp"))
622 "em1_st")
623
624(define_insn_reservation
625 "exynos_m1_neon_to_gp" 9
626 (and (eq_attr "tune" "exynosm1")
627 (eq_attr "exynos_m1_neon_type" "neon_to_gp"))
628 "em1_lfst")
629
630;; Load Instructions.
631
632(define_insn_reservation
633 "exynos_m1_neon_load" 5
634 (and (eq_attr "tune" "exynosm1")
635 (eq_attr "type" "f_loads, f_loadd, neon_ldp"))
636 "em1_ld")
637
638(define_insn_reservation
639 "exynos_m1_neon_load_q" 6
640 (and (eq_attr "tune" "exynosm1")
641 (eq_attr "type" "neon_ldp_q"))
642 "(em1_ld, em1_ld)")
643
644(define_insn_reservation
645 "exynos_m1_neon_load1_1" 6
646 (and (eq_attr "tune" "exynosm1")
647 (eq_attr "exynos_m1_neon_type" "neon_load1_1, neon_load1_all"))
648 "em1_ld")
649
650(define_insn_reservation
651 "exynos_m1_neon_load1_2" 6
652 (and (eq_attr "tune" "exynosm1")
653 (eq_attr "exynos_m1_neon_type" "neon_load1_2"))
654 "(em1_ld * 2)")
655
656(define_insn_reservation
657 "exynos_m1_neon_load1_3" 7
658 (and (eq_attr "tune" "exynosm1")
659 (eq_attr "exynos_m1_neon_type" "neon_load1_3"))
660 "(em1_ld * 3)")
661
662(define_insn_reservation
663 "exynos_m1_neon_load1_4" 8
664 (and (eq_attr "tune" "exynosm1")
665 (eq_attr "exynos_m1_neon_type" "neon_load1_4"))
666 "(em1_ld * 4)")
667
668(define_insn_reservation
669 "exynos_m1_neon_load1_one" 7
670 (and (eq_attr "tune" "exynosm1")
671 (eq_attr "exynos_m1_neon_type" "neon_load1_one"))
672 "((em1_ld * 2), em1_nalu)")
673
674(define_insn_reservation
675 "exynos_m1_neon_load2_2" 10
676 (and (eq_attr "tune" "exynosm1")
677 (eq_attr "exynos_m1_neon_type" "neon_load2_2"))
678 "(em1_ld * 5)")
679
680(define_insn_reservation
681 "exynos_m1_neon_load2_one" 7
682 (and (eq_attr "tune" "exynosm1")
683 (eq_attr "exynos_m1_neon_type" "neon_load2_one"))
684 "((em1_ld * 2), (em1_nalu * 2))")
685
686(define_insn_reservation
687 "exynos_m1_neon_load2_all" 6
688 (and (eq_attr "tune" "exynosm1")
689 (eq_attr "exynos_m1_neon_type" "neon_load2_all"))
690 "(em1_ld * 2)")
691
692(define_insn_reservation
693 "exynos_m1_neon_load3_3" 12
694 (and (eq_attr "tune" "exynosm1")
695 (eq_attr "exynos_m1_neon_type" "neon_load3_3"))
696 "(em1_ld * 6)")
697
698(define_insn_reservation
699 "exynos_m1_neon_load3_one" 9
700 (and (eq_attr "tune" "exynosm1")
701 (eq_attr "exynos_m1_neon_type" "neon_load3_one"))
702 "((em1_ld * 4), (em1_nalu * 3))")
703
704(define_insn_reservation
705 "exynos_m1_neon_load3_all" 7
706 (and (eq_attr "tune" "exynosm1")
707 (eq_attr "exynos_m1_neon_type" "neon_load3_all"))
708 "(em1_ld * 3)")
709
710(define_insn_reservation
711 "exynos_m1_neon_load4_4" 14
712 (and (eq_attr "tune" "exynosm1")
713 (eq_attr "exynos_m1_neon_type" "neon_load4_4"))
714 "(em1_ld * 7)")
715
716(define_insn_reservation
717 "exynos_m1_neon_load4_one" 9
718 (and (eq_attr "tune" "exynosm1")
719 (eq_attr "exynos_m1_neon_type" "neon_load4_one"))
720 "((em1_ld * 4), (em1_nalu * 4))")
721
722(define_insn_reservation
723 "exynos_m1_neon_load4_all" 8
724 (and (eq_attr "tune" "exynosm1")
725 (eq_attr "exynos_m1_neon_type" "neon_load4_all"))
726 "(em1_ld * 4)")
727
728;; Store Instructions.
729
730(define_insn_reservation
731 "exynos_m1_neon_store" 1
732 (and (eq_attr "tune" "exynosm1")
733 (eq_attr "exynos_m1_neon_type" "neon_store"))
734 "(em1_fst, em1_st)")
735
736(define_insn_reservation
737 "exynos_m1_neon_store1_1" 1
738 (and (eq_attr "tune" "exynosm1")
739 (eq_attr "exynos_m1_neon_type" "neon_store1_1"))
740 "em1_sfst")
741
742(define_insn_reservation
743 "exynos_m1_neon_store1_2" 2
744 (and (eq_attr "tune" "exynosm1")
745 (eq_attr "exynos_m1_neon_type" "neon_store1_2"))
746 "(em1_sfst * 2)")
747
748(define_insn_reservation
749 "exynos_m1_neon_store1_3" 3
750 (and (eq_attr "tune" "exynosm1")
751 (eq_attr "exynos_m1_neon_type" "neon_store1_3"))
752 "(em1_sfst * 3)")
753
754(define_insn_reservation
755 "exynos_m1_neon_store1_4" 4
756 (and (eq_attr "tune" "exynosm1")
757 (eq_attr "exynos_m1_neon_type" "neon_store1_4"))
758 "(em1_sfst * 4)")
759
760(define_insn_reservation
761 "exynos_m1_neon_store1_one" 7
762 (and (eq_attr "tune" "exynosm1")
763 (eq_attr "exynos_m1_neon_type" "neon_store1_one"))
764 "(em1_fst, em1_st)")
765
766(define_insn_reservation
767 "exynos_m1_neon_store2" 7
768 (and (eq_attr "tune" "exynosm1")
769 (eq_attr "exynos_m1_neon_type" "neon_store2_2, neon_store2_one"))
770 "em1_sfst, em1_fst")
771
772(define_insn_reservation
773 "exynos_m1_neon_store3" 16
774 (and (eq_attr "tune" "exynosm1")
775 (eq_attr "exynos_m1_neon_type" "neon_store3_3, neon_store3_one"))
776 "((em1_sfst * 3), (em1_fst * 2), em1_nalu)")
777
778(define_insn_reservation
779 "exynos_m1_neon_store4" 17
780 (and (eq_attr "tune" "exynosm1")
781 (eq_attr "exynos_m1_neon_type" "neon_store4_4, neon_store4_one"))
782 "((em1_sfst * 4), (em1_fst * 2), em1_nalu)")
783
784;; Floating-Point Operations.
785
786(define_insn_reservation "exynos_m1_fp_const" 2
787 (and (eq_attr "tune" "exynosm1")
788 (eq_attr "type" "fconsts, fconstd"))
789 "em1_nalu")
790
791(define_insn_reservation "exynos_m1_fp_add" 4
792 (and (eq_attr "tune" "exynosm1")
793 (eq_attr "type" "fadds, faddd"))
794 "em1_fadd")
795
796(define_insn_reservation "exynos_m1_fp_mul" 5
797 (and (eq_attr "tune" "exynosm1")
798 (eq_attr "type" "fmuls, fmuld"))
799 "em1_fmac")
800
801(define_insn_reservation "exynos_m1_fp_mac" 6
802 (and (eq_attr "tune" "exynosm1")
803 (eq_attr "type" "fmacs, ffmas, fmacd, ffmad"))
804 "em1_fmac")
805
806(define_insn_reservation "exynos_m1_fp_cvt" 4
807 (and (eq_attr "tune" "exynosm1")
808 (eq_attr "type" "f_cvt, f_rints, f_rintd"))
809 "em1_fcvt")
810
811(define_insn_reservation "exynos_m1_fp_cvt_i" 13
812 (and (eq_attr "tune" "exynosm1")
813 (eq_attr "type" "f_cvtf2i"))
814 "(em1_fcvt, em1_lfst)")
815
816(define_insn_reservation "exynos_m1_i_cvt_fp" 9
817 (and (eq_attr "tune" "exynosm1")
818 (eq_attr "type" "f_cvti2f"))
819 "(em1_st, em1_fcvt)")
820
821(define_insn_reservation "exynos_m1_fp_cmp" 4
822 (and (eq_attr "tune" "exynosm1")
823 (eq_attr "type" "fcmps, fcmpd"))
824 "em1_nmisc")
825
826(define_insn_reservation "exynos_m1_fp_sel" 4
827 (and (eq_attr "tune" "exynosm1")
828 (eq_attr "type" "fcsel"))
829 "(em1_st + em1_nalu0)")
830
831(define_insn_reservation "exynos_m1_fp_arith" 2
832 (and (eq_attr "tune" "exynosm1")
833 (eq_attr "type" "ffariths, ffarithd"))
834 "em1_nalu")
835
836(define_insn_reservation "exynos_m1_fp_cpy" 2
837 (and (eq_attr "tune" "exynosm1")
838 (eq_attr "type" "fmov"))
839 "em1_nalu")
840
841(define_insn_reservation "exynos_m1_fp_divs" 15
842 (and (eq_attr "tune" "exynosm1")
843 (eq_attr "type" "fdivs, neon_fp_div_s, neon_fp_div_s_q,\
844 fsqrts, neon_fp_sqrt_s, neon_fp_sqrt_s_q"))
845 "(em1_fvar * 9)")
846
847(define_insn_reservation "exynos_m1_fp_divd" 22
848 (and (eq_attr "tune" "exynosm1")
849 (eq_attr "type" "fdivd, neon_fp_div_d, neon_fp_div_d_q,\
850 fsqrtd, neon_fp_sqrt_d, neon_fp_sqrt_d_q"))
851 "(em1_fvar * 9)")
852
853(define_insn_reservation "exynos_m1_fp_minmax" 2
854 (and (eq_attr "tune" "exynosm1")
855 (eq_attr "type" "f_minmaxs, f_minmaxd"))
856 "(em1_nmisc * 2)")
857
858;; Crypto Operations.
859
860(define_insn_reservation "exynos_m1_crypto_simple" 2
861 (and (eq_attr "tune" "exynosm1")
862 (eq_attr "type" "crypto_aese, crypto_aesmc,\
863 crypto_sha1_xor, crypto_sha1_fast, crypto_sha256_fast"))
864 "em1_ncrypt")
865
866(define_insn_reservation "exynos_m1_crypto_complex" 6
867 (and (eq_attr "tune" "exynosm1")
868 (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow"))
869 "em1_ncrypt")
870
871(define_insn_reservation "exynos_m1_crypto_poly" 2
872 (and (eq_attr "tune" "exynosm1")
873 (eq_attr "type" "neon_mul_b_long, neon_mul_h_long, neon_mul_s_long"))
874 "em1_ncrypt")
875
876(define_insn_reservation "exynos_m1_crypto_polyl" 4
877 (and (eq_attr "tune" "exynosm1")
878 (eq_attr "type" "neon_mul_d_long"))
879 "em1_ncrypt")
880
881(define_insn_reservation "exynos_m1_crc" 2
882 (and (eq_attr "tune" "exynosm1")
883 (eq_attr "type" "crc"))
884 "em1_c")
885
886;; Simple execution unit bypasses
887
888;; Pre-decrement and post-increment addressing modes update the register quickly.
889;; TODO: figure out how to tell the addressing mode register from the loaded one.
890(define_bypass 1 "exynos_m1_store*" "exynos_m1_store*")
891
892;; MLAs can feed other MLAs quickly.
893(define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*")
894
895;; Insns in FMAC or FADD can feed other such insns quickly.
896(define_bypass 4 "exynos_m1_fp_mul"
897 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
898(define_bypass 5 "exynos_m1_fp_mac"
899 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
900(define_bypass 4 "exynos_m1_neon_fp_mul"
901 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
902 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
903(define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step"
904 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
905 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
906
907(define_bypass 3 "exynos_m1_fp_add"
908 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
909(define_bypass 3 "exynos_m1_neon_fp_add"
910 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
911 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
912
913;; Insns in NALU can feed other such insns quickly.
914(define_bypass 1 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy"
915 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
916 exynos_m1_fp_sel")
917(define_bypass 3 "exynos_m1_fp_sel"
918 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
919 exynos_m1_fp_sel")
920(define_bypass 1 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
921 exynos_m1_neon_bitops, exynos_m1_neon_bitins,\
922 exynos_m1_neon_tbl"
923 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
924 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
925 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
926 exynos_m1_neon_tbl")
927(define_bypass 3 "exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex"
928 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
929 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
930 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
931 exynos_m1_neon_tbl")
932(define_bypass 1 "exynos_m1_neon_fp_unary" "exynos_m1_neon_fp_unary")
933
934;; Insns in NCRYPT can feed other such insns quickly.
935(define_bypass 1 "exynos_m1_crypto_simple, exynos_m1_crypto_poly"
936 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
937 exynos_m1_crypto_poly*")
938(define_bypass 3 "exynos_m1_crypto_polyl"
939 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
940 exynos_m1_crypto_poly*")
941(define_bypass 5 "exynos_m1_crypto_complex"
942 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
943 exynos_m1_crypto_poly*")
944
945;; Predicted branches take no time, but mispredicted ones take forever anyway.
946(define_bypass 1 "exynos_m1_*"
947 "exynos_m1_call, exynos_m1_branch")